mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
297 Commits
REL4_2_STA
...
v4.3.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b4ca6851ab | ||
|
|
347948b79f | ||
|
|
83e492d4ef | ||
|
|
1906ea89bd | ||
|
|
eab4fd2795 | ||
|
|
3f1fe9b6c2 | ||
|
|
e672f7e3ee | ||
|
|
fd86160dff | ||
|
|
f19cf62f09 | ||
|
|
8018ba97d6 | ||
|
|
73554c6e16 | ||
|
|
f23a93e12d | ||
|
|
d9947a46e8 | ||
|
|
e3a632e29d | ||
|
|
939cbd0721 | ||
|
|
c45c5abfb8 | ||
|
|
1953ec7459 | ||
|
|
a6eacca6e4 | ||
|
|
948e076ad9 | ||
|
|
a3bd9d33ff | ||
|
|
9dc928a7d5 | ||
|
|
9acf7bdfea | ||
|
|
29acd10f37 | ||
|
|
9df511eee3 | ||
|
|
6441db23ff | ||
|
|
7792de3543 | ||
|
|
94fe3e395e | ||
|
|
ff26173b1e | ||
|
|
4c11a57334 | ||
|
|
1d2d6e3587 | ||
|
|
c03913d32a | ||
|
|
37a41a66f9 | ||
|
|
4c2c8ecbab | ||
|
|
b84b6180ee | ||
|
|
58f55222d9 | ||
|
|
5cbaff8d0a | ||
|
|
a38e229e61 | ||
|
|
272abdd483 | ||
|
|
b4f6043abc | ||
|
|
a7f3f899ff | ||
|
|
3ec43eda36 | ||
|
|
ce8e1cccc4 | ||
|
|
70bfa4c8e1 | ||
|
|
f0d5ad503d | ||
|
|
b9ee57ee0f | ||
|
|
d5d6ed4be7 | ||
|
|
f4655074ae | ||
|
|
67d26ab7e2 | ||
|
|
70a7b45a03 | ||
|
|
4251590833 | ||
|
|
9347d34ce0 | ||
|
|
feb90ee50c | ||
|
|
0a6486bb7f | ||
|
|
39443bbcee | ||
|
|
fc636b1bd2 | ||
|
|
048bad1c88 | ||
|
|
4528eb1796 | ||
|
|
169c9ccd32 | ||
|
|
5f92fbddf2 | ||
|
|
617e466f72 | ||
|
|
435fac297b | ||
|
|
4bc12b4c94 | ||
|
|
91234994e2 | ||
|
|
ee9da30f20 | ||
|
|
2e67bc1341 | ||
|
|
18ab5cab4e | ||
|
|
60bb4e9fc8 | ||
|
|
52bee6b98d | ||
|
|
ecb1f379f5 | ||
|
|
e1cd2c22d4 | ||
|
|
1dea6b76d9 | ||
|
|
702f90fc9d | ||
|
|
c4d1eec6f3 | ||
|
|
b241c606c0 | ||
|
|
45c896d716 | ||
|
|
514595ea10 | ||
|
|
531194fa27 | ||
|
|
2aa67c992c | ||
|
|
37892afcfc | ||
|
|
e4e5e35552 | ||
|
|
b320c1f0ae | ||
|
|
280654bed6 | ||
|
|
ae675059c0 | ||
|
|
454ebabe89 | ||
|
|
d1d6ef8d12 | ||
|
|
5d6eab74f6 | ||
|
|
59b7453bbf | ||
|
|
bde8c7e29c | ||
|
|
bc6584a90d | ||
|
|
074d79b44f | ||
|
|
2eeb288573 | ||
|
|
48a2274b11 | ||
|
|
19bcfa7264 | ||
|
|
486877c3d5 | ||
|
|
9753bcc8c3 | ||
|
|
bd35b450da | ||
|
|
1f256d4d73 | ||
|
|
1524e2449f | ||
|
|
0cd2bd2e91 | ||
|
|
98b78df16c | ||
|
|
b946dce2f0 | ||
|
|
39234afcbf | ||
|
|
23569a19b1 | ||
|
|
c650fd3412 | ||
|
|
c30e65b3f2 | ||
|
|
07097575b1 | ||
|
|
71d151ca87 | ||
|
|
5abec2bb97 | ||
|
|
de70fd42dc | ||
|
|
99550b91bd | ||
|
|
70190c37c4 | ||
|
|
f3fc4e5afb | ||
|
|
629c552348 | ||
|
|
85a97c933f | ||
|
|
3a5a4388c7 | ||
|
|
9338a9e233 | ||
|
|
7fad2ed2c8 | ||
|
|
9305953bd2 | ||
|
|
aeb9639ed9 | ||
|
|
bc9e725d05 | ||
|
|
905e108f8f | ||
|
|
f2362a06fa | ||
|
|
7b85cb9f12 | ||
|
|
790bec21dd | ||
|
|
a0dc673439 | ||
|
|
25019d1cc5 | ||
|
|
d00cb767a6 | ||
|
|
8e0d28d8dc | ||
|
|
e146fb4fc3 | ||
|
|
8773543e10 | ||
|
|
a4cd4ee553 | ||
|
|
a61dd8a750 | ||
|
|
2c84716e66 | ||
|
|
f1667a7e98 | ||
|
|
b91900f831 | ||
|
|
aa1e64ec11 | ||
|
|
5d6037303b | ||
|
|
8aaf6571a0 | ||
|
|
9433f80364 | ||
|
|
aee13aee52 | ||
|
|
f0a0be0248 | ||
|
|
c4332d9a52 | ||
|
|
c7b325e2a4 | ||
|
|
b89941f218 | ||
|
|
2b3b1faa20 | ||
|
|
b9cd321aed | ||
|
|
984ce7420b | ||
|
|
464ec6bec3 | ||
|
|
3bbbf6daa9 | ||
|
|
cd3312496e | ||
|
|
cce8b76171 | ||
|
|
2a529e7e8b | ||
|
|
f62b3b2868 | ||
|
|
701944c194 | ||
|
|
d8048060a2 | ||
|
|
31f25856a2 | ||
|
|
92c73b68a0 | ||
|
|
90909e2e42 | ||
|
|
b036870c83 | ||
|
|
321eb844e4 | ||
|
|
2c9700586c | ||
|
|
f9a1861ded | ||
|
|
59ed86c01a | ||
|
|
f24b30327c | ||
|
|
48381a5b4e | ||
|
|
20b79f998c | ||
|
|
a41e7bb726 | ||
|
|
b9ba97a36d | ||
|
|
d8aa472c5f | ||
|
|
9273e7af73 | ||
|
|
f04f2af8aa | ||
|
|
bdb4f66a9d | ||
|
|
c402b08791 | ||
|
|
64bb034d34 | ||
|
|
ea54aaa290 | ||
|
|
b34c331eba | ||
|
|
19e0b6a1b6 | ||
|
|
9349171b55 | ||
|
|
d4ee4cc14c | ||
|
|
d7420d7274 | ||
|
|
70e4243a1d | ||
|
|
b6264b77c4 | ||
|
|
9e7cb6d01c | ||
|
|
0435bda115 | ||
|
|
a5aa47c1dd | ||
|
|
7654dd615b | ||
|
|
c83e9870fe | ||
|
|
8b13d14294 | ||
|
|
ba13172b3a | ||
|
|
32b81e7d49 | ||
|
|
cbfef17a1d | ||
|
|
a48d408e4e | ||
|
|
e5f50e7b99 | ||
|
|
aeea02b598 | ||
|
|
59eca2be30 | ||
|
|
dfe57d2406 | ||
|
|
061932d023 | ||
|
|
3f5762e03a | ||
|
|
42fa9a2a88 | ||
|
|
f23065e041 | ||
|
|
efe4a9c344 | ||
|
|
0970789b1d | ||
|
|
07b79286b5 | ||
|
|
c3d284e097 | ||
|
|
a9e09d436a | ||
|
|
965984a510 | ||
|
|
1980deb480 | ||
|
|
b6fe91ebcd | ||
|
|
44cbb44500 | ||
|
|
99161c38d2 | ||
|
|
57d3ee768c | ||
|
|
7dce3ed234 | ||
|
|
58efb0f158 | ||
|
|
d261768541 | ||
|
|
aa8547a219 | ||
|
|
9f04a846ec | ||
|
|
ff0e480fdd | ||
|
|
8881b69c06 | ||
|
|
0b3a310802 | ||
|
|
4523137bfc | ||
|
|
666f5cf851 | ||
|
|
e89938e132 | ||
|
|
d97905f6fd | ||
|
|
bed66edfd9 | ||
|
|
ba7ef9e643 | ||
|
|
10be941298 | ||
|
|
75379eab2e | ||
|
|
d4e993a240 | ||
|
|
695a45f9ed | ||
|
|
028c874f81 | ||
|
|
b3c2831bd3 | ||
|
|
e191a32eac | ||
|
|
c66c8ebc98 | ||
|
|
3389491151 | ||
|
|
81eb9d99e7 | ||
|
|
1156f27979 | ||
|
|
b5b9aacc8a | ||
|
|
b89b3c0961 | ||
|
|
9cf5bf3f93 | ||
|
|
9a5bd0d489 | ||
|
|
40408a1734 | ||
|
|
40410e43ab | ||
|
|
3c25d5a03a | ||
|
|
7e21ceb158 | ||
|
|
313aa3c5d7 | ||
|
|
10d46f7e85 | ||
|
|
9e90fcd584 | ||
|
|
c53782cda3 | ||
|
|
66b40ffc68 | ||
|
|
a6a2be2239 | ||
|
|
bdcc4d9e83 | ||
|
|
9f587efb74 | ||
|
|
2aacd29e60 | ||
|
|
311f7e561e | ||
|
|
b498db87aa | ||
|
|
74c44a7178 | ||
|
|
5ff3744895 | ||
|
|
793d83b22c | ||
|
|
0f4e04e61e | ||
|
|
80a280cbf4 | ||
|
|
b223cb4cee | ||
|
|
9d1f5c0de3 | ||
|
|
784c9c4793 | ||
|
|
0caec90d81 | ||
|
|
1458f6e6aa | ||
|
|
a2d38c6084 | ||
|
|
5f1bf0fb8f | ||
|
|
7d99b96717 | ||
|
|
3b10750a7f | ||
|
|
af0a60b8eb | ||
|
|
b419c5fec7 | ||
|
|
2cfcc33a64 | ||
|
|
273db444b2 | ||
|
|
2bf3eeb931 | ||
|
|
c3bc5585d9 | ||
|
|
b84f217710 | ||
|
|
90c49c0c28 | ||
|
|
41c1550788 | ||
|
|
c336e384ab | ||
|
|
bc1956dee9 | ||
|
|
a459c60145 | ||
|
|
65721bbbcd | ||
|
|
96895ba8a8 | ||
|
|
e0d6d906e7 | ||
|
|
dc8ffd30c6 | ||
|
|
24392fa11b | ||
|
|
06b5239ada | ||
|
|
56173d94a9 | ||
|
|
578f11003c | ||
|
|
36bd7cdc9f | ||
|
|
62ac56c3f5 | ||
|
|
c79852cce0 | ||
|
|
3907a545b0 | ||
|
|
d1d057a184 | ||
|
|
b70e3b48c8 | ||
|
|
ab6c3d9b6e | ||
|
|
6999dbb52a |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -47,6 +47,9 @@ lib*.pc
|
|||||||
# other
|
# other
|
||||||
/.lineno
|
/.lineno
|
||||||
*.dSYM
|
*.dSYM
|
||||||
|
*.orig
|
||||||
|
*.rej
|
||||||
|
|
||||||
# generated binaries
|
# generated binaries
|
||||||
repmgr
|
repmgr
|
||||||
repmgrd
|
repmgrd
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ License and Contributions
|
|||||||
=========================
|
=========================
|
||||||
|
|
||||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||||
Copyright 2010-2018, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
Copyright 2010-2019, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||||
@@ -24,7 +24,7 @@ Code style
|
|||||||
Code in repmgr should be formatted to the same standards as the main PostgreSQL
|
Code in repmgr should be formatted to the same standards as the main PostgreSQL
|
||||||
project. For more details see:
|
project. For more details see:
|
||||||
|
|
||||||
https://www.postgresql.org/docs/current/static/source-format.html
|
https://www.postgresql.org/docs/current/source-format.html
|
||||||
|
|
||||||
Contributors should reformat their code similarly before submitting code to
|
Contributors should reformat their code similarly before submitting code to
|
||||||
the project, in order to minimize merge conflicts with other work.
|
the project, in order to minimize merge conflicts with other work.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2018, 2ndQuadrant Limited
|
Copyright (c) 2010-2019, 2ndQuadrant Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
4
FAQ.md
4
FAQ.md
@@ -1,8 +1,10 @@
|
|||||||
FAQ - Frequently Asked Questions about repmgr
|
FAQ - Frequently Asked Questions about repmgr
|
||||||
=============================================
|
=============================================
|
||||||
|
|
||||||
The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/4.0/appendix-faq.html "repmgr FAQ")
|
The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/current/appendix-faq.html "repmgr FAQ")
|
||||||
|
|
||||||
The repmgr 3.x FAQ can be found here:
|
The repmgr 3.x FAQ can be found here:
|
||||||
|
|
||||||
https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
|
https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
|
||||||
|
|
||||||
|
Note that repmgr 3.x is no longer supported.
|
||||||
|
|||||||
34
HISTORY
34
HISTORY
@@ -1,12 +1,42 @@
|
|||||||
4.2.0 2018-??-??
|
4.3 2019-??
|
||||||
|
repmgr: add "daemon (start|stop)" command; GitHub #528 (Ian)
|
||||||
|
repmgr: add --version-number command line option (Ian)
|
||||||
|
repmgr: add --compact option to "cluster show"; GitHub #521 (Ian)
|
||||||
|
repmgr: cluster show - differentiate between unreachable nodes
|
||||||
|
and nodes which are running but rejecting connections (Ian)
|
||||||
|
repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian)
|
||||||
|
repmgr: add "node check --data-directory-config"; GitHub #523 (Ian)
|
||||||
|
repmgr: prevent potential race condition in "standby switchover"
|
||||||
|
when checking received WAL location; GitHub #518 (Ian)
|
||||||
|
repmgr: ensure "standby switchover" verifies repmgr can read the
|
||||||
|
data directory on the demotion candidate; GitHub #523 (Ian)
|
||||||
|
repmgr: ensure "standby switchover" verifies replication connection
|
||||||
|
exists; GitHub #519 (Ian)
|
||||||
|
repmgr: add sanity check for correct extension version (Ian)
|
||||||
|
repmgr: ensure "witness register --dry-run" does not attempt to read node
|
||||||
|
tables if repmgr extension not installed; GitHub #513 (Ian)
|
||||||
|
repmgr: ensure "standby register" fails when --upstream-node-id is the
|
||||||
|
same as the local node ID (Ian)
|
||||||
|
repmgrd: check binary and extension major versions match; GitHub #515 (Ian)
|
||||||
|
repmgrd: on a cascaded standby, don't fail over if "failover=manual";
|
||||||
|
GitHub #531 (Ian)
|
||||||
|
repmgrd: don't consider nodes where repmgrd is not running as promotion
|
||||||
|
candidates (Ian)
|
||||||
|
repmgrd: add option "connection_check_type" (Ian)
|
||||||
|
repmgrd: improve witness monitoring when primary node not available (Ian)
|
||||||
|
repmgrd: handle situation where a primary has unexpectedly appeared
|
||||||
|
during failover; GitHub #420 (Ian)
|
||||||
|
|
||||||
|
4.2 2018-10-24
|
||||||
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
||||||
GitHub #504 (Ian)
|
GitHub #504 (Ian)
|
||||||
repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
|
repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
|
||||||
repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
|
repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
|
||||||
GitHub #246 (Ian)
|
GitHub #246 (Ian)
|
||||||
repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
|
repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
|
||||||
repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
|
repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
|
||||||
repmgrd: fix parsing of -d/--daemonize option (Ian)
|
repmgrd: fix parsing of -d/--daemonize option (Ian)
|
||||||
|
repmgrd: support "pausing" of repmgrd (Ian)
|
||||||
|
|
||||||
4.1.1 2018-09-05
|
4.1.1 2018-09-05
|
||||||
logging: explicitly log the text of failed queries as ERRORs to
|
logging: explicitly log the text of failed queries as ERRORs to
|
||||||
|
|||||||
41
Makefile.in
41
Makefile.in
@@ -15,7 +15,9 @@ DATA = \
|
|||||||
repmgr--4.0--4.1.sql \
|
repmgr--4.0--4.1.sql \
|
||||||
repmgr--4.1.sql \
|
repmgr--4.1.sql \
|
||||||
repmgr--4.1--4.2.sql \
|
repmgr--4.1--4.2.sql \
|
||||||
repmgr--4.2.sql
|
repmgr--4.2.sql \
|
||||||
|
repmgr--4.2--4.3.sql \
|
||||||
|
repmgr--4.3.sql
|
||||||
|
|
||||||
REGRESS = repmgr_extension
|
REGRESS = repmgr_extension
|
||||||
|
|
||||||
@@ -30,21 +32,26 @@ all: \
|
|||||||
PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
|
PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
|
||||||
SHLIB_LINK = $(libpq)
|
SHLIB_LINK = $(libpq)
|
||||||
|
|
||||||
HEADERS = $(wildcard *.h)
|
|
||||||
|
|
||||||
OBJS = \
|
OBJS = \
|
||||||
repmgr.o
|
repmgr.o
|
||||||
|
|
||||||
include Makefile.global
|
include Makefile.global
|
||||||
|
|
||||||
|
ifeq ($(vpath_build),yes)
|
||||||
|
HEADERS = $(wildcard *.h)
|
||||||
|
else
|
||||||
|
HEADERS_built = $(wildcard *.h)
|
||||||
|
endif
|
||||||
|
|
||||||
$(info Building against PostgreSQL $(MAJORVERSION))
|
$(info Building against PostgreSQL $(MAJORVERSION))
|
||||||
|
|
||||||
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
||||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
||||||
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
|
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
|
||||||
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
|
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o
|
||||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
|
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o
|
||||||
DATE=$(shell date "+%Y-%m-%d")
|
DATE=$(shell date "+%Y-%m-%d")
|
||||||
|
|
||||||
repmgr_version.h: repmgr_version.h.in
|
repmgr_version.h: repmgr_version.h.in
|
||||||
@@ -79,29 +86,15 @@ clean: additional-clean
|
|||||||
maintainer-clean: additional-maintainer-clean
|
maintainer-clean: additional-maintainer-clean
|
||||||
|
|
||||||
additional-clean:
|
additional-clean:
|
||||||
rm -f repmgr-client.o
|
rm -f *.o
|
||||||
rm -f repmgr-action-primary.o
|
|
||||||
rm -f repmgr-action-standby.o
|
|
||||||
rm -f repmgr-action-witness.o
|
|
||||||
rm -f repmgr-action-bdr.o
|
|
||||||
rm -f repmgr-action-node.o
|
|
||||||
rm -f repmgr-action-cluster.o
|
|
||||||
rm -f repmgr-action-daemon.o
|
|
||||||
rm -f repmgrd.o
|
|
||||||
rm -f repmgrd-physical.o
|
|
||||||
rm -f repmgrd-bdr.o
|
|
||||||
rm -f compat.o
|
|
||||||
rm -f configfile.o
|
|
||||||
rm -f controldata.o
|
|
||||||
rm -f dbutils.o
|
|
||||||
rm -f dirutil.o
|
|
||||||
rm -f log.o
|
|
||||||
rm -f strutil.o
|
|
||||||
|
|
||||||
maintainer-additional-clean: clean
|
additional-maintainer-clean: clean
|
||||||
rm -f configure
|
$(MAKE) -C doc maintainer-clean
|
||||||
rm -f config.status config.log
|
rm -f config.status config.log
|
||||||
|
rm -f config.h
|
||||||
|
rm -f repmgr_version.h
|
||||||
rm -f Makefile
|
rm -f Makefile
|
||||||
|
rm -f Makefile.global
|
||||||
@rm -rf autom4te.cache/
|
@rm -rf autom4te.cache/
|
||||||
|
|
||||||
ifeq ($(MAJORVERSION),$(filter $(MAJORVERSION),9.3 9.4))
|
ifeq ($(MAJORVERSION),$(filter $(MAJORVERSION),9.3 9.4))
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ operations.
|
|||||||
`repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
|
`repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
|
||||||
the use of all of the latest features in PostgreSQL replication.
|
the use of all of the latest features in PostgreSQL replication.
|
||||||
|
|
||||||
PostgreSQL 10, 9.6 and 9.5 are fully supported.
|
PostgreSQL 11, 10, 9.6 and 9.5 are fully supported.
|
||||||
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||||
|
|
||||||
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
||||||
@@ -19,7 +19,7 @@ PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
|||||||
|
|
||||||
`repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
|
`repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
|
||||||
only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
|
only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
|
||||||
for details. `repmgr 4` will support future public BDR releases.
|
for details.
|
||||||
|
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
@@ -27,7 +27,7 @@ Documentation
|
|||||||
|
|
||||||
The main `repmgr` documentation is available here:
|
The main `repmgr` documentation is available here:
|
||||||
|
|
||||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.0/index.html)
|
> [repmgr 4 documentation](https://repmgr.org/docs/4.2/index.html)
|
||||||
|
|
||||||
The `README` file for `repmgr` 3.x is available here:
|
The `README` file for `repmgr` 3.x is available here:
|
||||||
|
|
||||||
|
|||||||
35
compat.c
35
compat.c
@@ -6,7 +6,7 @@
|
|||||||
* supported PostgreSQL versions. They're unlikely to change but
|
* supported PostgreSQL versions. They're unlikely to change but
|
||||||
* it would be worth keeping an eye on them for any fixes/improvements.
|
* it would be worth keeping an eye on them for any fixes/improvements.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
@@ -98,9 +98,42 @@ appendShellString(PQExpBuffer buf, const char *str)
|
|||||||
|
|
||||||
if (*p == '\'')
|
if (*p == '\'')
|
||||||
appendPQExpBufferStr(buf, "'\"'\"'");
|
appendPQExpBufferStr(buf, "'\"'\"'");
|
||||||
|
else if (*p == '&')
|
||||||
|
appendPQExpBufferStr(buf, "\\&");
|
||||||
else
|
else
|
||||||
appendPQExpBufferChar(buf, *p);
|
appendPQExpBufferChar(buf, *p);
|
||||||
}
|
}
|
||||||
|
|
||||||
appendPQExpBufferChar(buf, '\'');
|
appendPQExpBufferChar(buf, '\'');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adapted from: src/fe_utils/string_utils.c
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
appendRemoteShellString(PQExpBuffer buf, const char *str)
|
||||||
|
{
|
||||||
|
const char *p;
|
||||||
|
|
||||||
|
appendPQExpBufferStr(buf, "\\'");
|
||||||
|
|
||||||
|
for (p = str; *p; p++)
|
||||||
|
{
|
||||||
|
if (*p == '\n' || *p == '\r')
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
_("shell command argument contains a newline or carriage return: \"%s\"\n"),
|
||||||
|
str);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*p == '\'')
|
||||||
|
appendPQExpBufferStr(buf, "'\"'\"'");
|
||||||
|
else if (*p == '&')
|
||||||
|
appendPQExpBufferStr(buf, "\\&");
|
||||||
|
else
|
||||||
|
appendPQExpBufferChar(buf, *p);
|
||||||
|
}
|
||||||
|
|
||||||
|
appendPQExpBufferStr(buf, "\\'");
|
||||||
|
}
|
||||||
|
|||||||
4
compat.h
4
compat.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* compat.h
|
* compat.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
@@ -27,4 +27,6 @@ extern void appendConnStrVal(PQExpBuffer buf, const char *str);
|
|||||||
|
|
||||||
extern void appendShellString(PQExpBuffer buf, const char *str);
|
extern void appendShellString(PQExpBuffer buf, const char *str);
|
||||||
|
|
||||||
|
extern void appendRemoteShellString(PQExpBuffer buf, const char *str);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
216
configfile.c
216
configfile.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* config.c - parse repmgr.conf and other configuration-related functionality
|
* config.c - parse repmgr.conf and other configuration-related functionality
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -123,9 +123,9 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
|||||||
|
|
||||||
if (stat(config_file_path, &stat_config) != 0)
|
if (stat(config_file_path, &stat_config) != 0)
|
||||||
{
|
{
|
||||||
log_error(_("provided configuration file \"%s\" not found: %s"),
|
log_error(_("provided configuration file \"%s\" not found"),
|
||||||
config_file,
|
config_file);
|
||||||
strerror(errno));
|
log_detail("%s", strerror(errno));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -335,6 +335,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
*/
|
*/
|
||||||
options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
|
options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
|
||||||
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||||
|
options->wal_receive_check_timeout = DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT;
|
||||||
|
|
||||||
/*-----------------
|
/*-----------------
|
||||||
* repmgrd settings
|
* repmgrd settings
|
||||||
@@ -357,6 +358,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||||
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
|
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
|
||||||
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
|
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
|
||||||
|
options->standby_disconnect_on_failover = false;
|
||||||
|
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
|
||||||
|
options->connection_check_type = CHECK_PING;
|
||||||
|
options->primary_visibility_consensus = false;
|
||||||
|
memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command));
|
||||||
|
options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL;
|
||||||
|
|
||||||
/*-------------
|
/*-------------
|
||||||
* witness settings
|
* witness settings
|
||||||
@@ -371,17 +378,24 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->bdr_local_monitoring_only = false;
|
options->bdr_local_monitoring_only = false;
|
||||||
options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT;
|
options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT;
|
||||||
|
|
||||||
/*-----------------
|
/*-------------------------
|
||||||
* service settings
|
* service command settings
|
||||||
*-----------------
|
*-------------------------
|
||||||
*/
|
*/
|
||||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||||
memset(options->service_stop_command, 0, sizeof(options->service_stop_command));
|
|
||||||
memset(options->service_start_command, 0, sizeof(options->service_start_command));
|
memset(options->service_start_command, 0, sizeof(options->service_start_command));
|
||||||
|
memset(options->service_stop_command, 0, sizeof(options->service_stop_command));
|
||||||
memset(options->service_restart_command, 0, sizeof(options->service_restart_command));
|
memset(options->service_restart_command, 0, sizeof(options->service_restart_command));
|
||||||
memset(options->service_reload_command, 0, sizeof(options->service_reload_command));
|
memset(options->service_reload_command, 0, sizeof(options->service_reload_command));
|
||||||
memset(options->service_promote_command, 0, sizeof(options->service_promote_command));
|
memset(options->service_promote_command, 0, sizeof(options->service_promote_command));
|
||||||
|
|
||||||
|
/*---------------------------------
|
||||||
|
* repmgrd service command settings
|
||||||
|
*---------------------------------
|
||||||
|
*/
|
||||||
|
memset(options->repmgrd_service_start_command, 0, sizeof(options->repmgrd_service_start_command));
|
||||||
|
memset(options->repmgrd_service_stop_command, 0, sizeof(options->repmgrd_service_stop_command));
|
||||||
|
|
||||||
/*----------------------------
|
/*----------------------------
|
||||||
* event notification settings
|
* event notification settings
|
||||||
*----------------------------
|
*----------------------------
|
||||||
@@ -466,11 +480,18 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
/* Copy into correct entry in parameters struct */
|
/* Copy into correct entry in parameters struct */
|
||||||
if (strcmp(name, "node_id") == 0)
|
if (strcmp(name, "node_id") == 0)
|
||||||
{
|
{
|
||||||
options->node_id = repmgr_atoi(value, name, error_list, 1);
|
options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID);
|
||||||
node_id_found = true;
|
node_id_found = true;
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "node_name") == 0)
|
else if (strcmp(name, "node_name") == 0)
|
||||||
strncpy(options->node_name, value, MAXLEN);
|
{
|
||||||
|
if (strlen(value) < sizeof(options->node_name))
|
||||||
|
strncpy(options->node_name, value, sizeof(options->node_name));
|
||||||
|
else
|
||||||
|
item_list_append_format(error_list,
|
||||||
|
_("value for \"node_name\" must contain fewer than %lu characters"),
|
||||||
|
sizeof(options->node_name));
|
||||||
|
}
|
||||||
else if (strcmp(name, "conninfo") == 0)
|
else if (strcmp(name, "conninfo") == 0)
|
||||||
strncpy(options->conninfo, value, MAXLEN);
|
strncpy(options->conninfo, value, MAXLEN);
|
||||||
else if (strcmp(name, "data_directory") == 0)
|
else if (strcmp(name, "data_directory") == 0)
|
||||||
@@ -480,11 +501,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
|
|
||||||
else if (strcmp(name, "replication_user") == 0)
|
else if (strcmp(name, "replication_user") == 0)
|
||||||
{
|
{
|
||||||
if (strlen(value) < NAMEDATALEN)
|
if (strlen(value) < sizeof(options->replication_user))
|
||||||
strncpy(options->replication_user, value, NAMEDATALEN);
|
strncpy(options->replication_user, value, sizeof(options->replication_user));
|
||||||
else
|
else
|
||||||
item_list_append(error_list,
|
item_list_append_format(error_list,
|
||||||
_("value for \"replication_user\" must contain fewer than " STR(NAMEDATALEN) " characters"));
|
_("value for \"replication_user\" must contain fewer than %lu characters"),
|
||||||
|
sizeof(options->replication_user));
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "pg_bindir") == 0)
|
else if (strcmp(name, "pg_bindir") == 0)
|
||||||
strncpy(options->pg_bindir, value, MAXPGPATH);
|
strncpy(options->pg_bindir, value, MAXPGPATH);
|
||||||
@@ -550,6 +572,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||||
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
else if (strcmp(name, "wal_receive_check_timeout") == 0)
|
||||||
|
options->wal_receive_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
|
||||||
/* node rejoin settings */
|
/* node rejoin settings */
|
||||||
else if (strcmp(name, "node_rejoin_timeout") == 0)
|
else if (strcmp(name, "node_rejoin_timeout") == 0)
|
||||||
@@ -585,11 +609,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
else if (strcmp(name, "priority") == 0)
|
else if (strcmp(name, "priority") == 0)
|
||||||
options->priority = repmgr_atoi(value, name, error_list, 0);
|
options->priority = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "location") == 0)
|
else if (strcmp(name, "location") == 0)
|
||||||
strncpy(options->location, value, MAXLEN);
|
strncpy(options->location, value, sizeof(options->location));
|
||||||
else if (strcmp(name, "promote_command") == 0)
|
else if (strcmp(name, "promote_command") == 0)
|
||||||
strncpy(options->promote_command, value, MAXLEN);
|
strncpy(options->promote_command, value, sizeof(options->promote_command));
|
||||||
else if (strcmp(name, "follow_command") == 0)
|
else if (strcmp(name, "follow_command") == 0)
|
||||||
strncpy(options->follow_command, value, MAXLEN);
|
strncpy(options->follow_command, value, sizeof(options->follow_command));
|
||||||
else if (strcmp(name, "reconnect_attempts") == 0)
|
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||||
options->reconnect_attempts = repmgr_atoi(value, name, error_list, 0);
|
options->reconnect_attempts = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "reconnect_interval") == 0)
|
else if (strcmp(name, "reconnect_interval") == 0)
|
||||||
@@ -608,6 +632,36 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->repmgrd_standby_startup_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->repmgrd_standby_startup_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "repmgrd_pid_file") == 0)
|
else if (strcmp(name, "repmgrd_pid_file") == 0)
|
||||||
strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
|
strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
|
||||||
|
else if (strcmp(name, "standby_disconnect_on_failover") == 0)
|
||||||
|
options->standby_disconnect_on_failover = parse_bool(value, name, error_list);
|
||||||
|
else if (strcmp(name, "sibling_nodes_disconnect_timeout") == 0)
|
||||||
|
options->sibling_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
else if (strcmp(name, "connection_check_type") == 0)
|
||||||
|
{
|
||||||
|
if (strcasecmp(value, "ping") == 0)
|
||||||
|
{
|
||||||
|
options->connection_check_type = CHECK_PING;
|
||||||
|
}
|
||||||
|
else if (strcasecmp(value, "connection") == 0)
|
||||||
|
{
|
||||||
|
options->connection_check_type = CHECK_CONNECTION;
|
||||||
|
}
|
||||||
|
else if (strcasecmp(value, "query") == 0)
|
||||||
|
{
|
||||||
|
options->connection_check_type = CHECK_QUERY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
item_list_append(error_list,
|
||||||
|
_("value for \"connection_check_type\" must be \"ping\", \"connection\" or \"query\"\n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (strcmp(name, "primary_visibility_consensus") == 0)
|
||||||
|
options->primary_visibility_consensus = parse_bool(value, name, error_list);
|
||||||
|
else if (strcmp(name, "failover_validation_command") == 0)
|
||||||
|
strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command));
|
||||||
|
else if (strcmp(name, "election_rerun_interval") == 0)
|
||||||
|
options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
|
||||||
/* witness settings */
|
/* witness settings */
|
||||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||||
@@ -621,41 +675,48 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
|
|
||||||
/* service settings */
|
/* service settings */
|
||||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||||
strncpy(options->pg_ctl_options, value, MAXLEN);
|
strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options));
|
||||||
else if (strcmp(name, "service_stop_command") == 0)
|
|
||||||
strncpy(options->service_stop_command, value, MAXLEN);
|
|
||||||
else if (strcmp(name, "service_start_command") == 0)
|
else if (strcmp(name, "service_start_command") == 0)
|
||||||
strncpy(options->service_start_command, value, MAXLEN);
|
strncpy(options->service_start_command, value, sizeof(options->service_start_command));
|
||||||
|
else if (strcmp(name, "service_stop_command") == 0)
|
||||||
|
strncpy(options->service_stop_command, value, sizeof(options->service_stop_command));
|
||||||
else if (strcmp(name, "service_restart_command") == 0)
|
else if (strcmp(name, "service_restart_command") == 0)
|
||||||
strncpy(options->service_restart_command, value, MAXLEN);
|
strncpy(options->service_restart_command, value, sizeof(options->service_restart_command));
|
||||||
else if (strcmp(name, "service_reload_command") == 0)
|
else if (strcmp(name, "service_reload_command") == 0)
|
||||||
strncpy(options->service_reload_command, value, MAXLEN);
|
strncpy(options->service_reload_command, value, sizeof(options->service_reload_command));
|
||||||
else if (strcmp(name, "service_promote_command") == 0)
|
else if (strcmp(name, "service_promote_command") == 0)
|
||||||
strncpy(options->service_promote_command, value, MAXLEN);
|
strncpy(options->service_promote_command, value, sizeof(options->service_promote_command));
|
||||||
|
|
||||||
|
/* repmgrd service settings */
|
||||||
|
else if (strcmp(name, "repmgrd_service_start_command") == 0)
|
||||||
|
strncpy(options->repmgrd_service_start_command, value, sizeof(options->repmgrd_service_start_command));
|
||||||
|
else if (strcmp(name, "repmgrd_service_stop_command") == 0)
|
||||||
|
strncpy(options->repmgrd_service_stop_command, value, sizeof(options->repmgrd_service_stop_command));
|
||||||
|
|
||||||
|
|
||||||
/* event notification settings */
|
/* event notification settings */
|
||||||
else if (strcmp(name, "event_notification_command") == 0)
|
else if (strcmp(name, "event_notification_command") == 0)
|
||||||
strncpy(options->event_notification_command, value, MAXLEN);
|
strncpy(options->event_notification_command, value, sizeof(options->event_notification_command));
|
||||||
else if (strcmp(name, "event_notifications") == 0)
|
else if (strcmp(name, "event_notifications") == 0)
|
||||||
{
|
{
|
||||||
/* store unparsed value for comparison when reloading config */
|
/* store unparsed value for comparison when reloading config */
|
||||||
strncpy(options->event_notifications_orig, value, MAXLEN);
|
strncpy(options->event_notifications_orig, value, sizeof(options->event_notifications_orig));
|
||||||
parse_event_notifications_list(options, value);
|
parse_event_notifications_list(options, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* barman settings */
|
/* barman settings */
|
||||||
else if (strcmp(name, "barman_host") == 0)
|
else if (strcmp(name, "barman_host") == 0)
|
||||||
strncpy(options->barman_host, value, MAXLEN);
|
strncpy(options->barman_host, value, sizeof(options->barman_host));
|
||||||
else if (strcmp(name, "barman_server") == 0)
|
else if (strcmp(name, "barman_server") == 0)
|
||||||
strncpy(options->barman_server, value, MAXLEN);
|
strncpy(options->barman_server, value, sizeof(options->barman_server));
|
||||||
else if (strcmp(name, "barman_config") == 0)
|
else if (strcmp(name, "barman_config") == 0)
|
||||||
strncpy(options->barman_config, value, MAXLEN);
|
strncpy(options->barman_config, value, sizeof(options->barman_config));
|
||||||
|
|
||||||
/* rsync/ssh settings */
|
/* rsync/ssh settings */
|
||||||
else if (strcmp(name, "rsync_options") == 0)
|
else if (strcmp(name, "rsync_options") == 0)
|
||||||
strncpy(options->rsync_options, value, MAXLEN);
|
strncpy(options->rsync_options, value, sizeof(options->rsync_options));
|
||||||
else if (strcmp(name, "ssh_options") == 0)
|
else if (strcmp(name, "ssh_options") == 0)
|
||||||
strncpy(options->ssh_options, value, MAXLEN);
|
strncpy(options->ssh_options, value, sizeof(options->ssh_options));
|
||||||
|
|
||||||
/* undocumented settings for testing */
|
/* undocumented settings for testing */
|
||||||
else if (strcmp(name, "promote_delay") == 0)
|
else if (strcmp(name, "promote_delay") == 0)
|
||||||
@@ -816,13 +877,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
if (options->archive_ready_warning >= options->archive_ready_critical)
|
if (options->archive_ready_warning >= options->archive_ready_critical)
|
||||||
{
|
{
|
||||||
item_list_append(error_list,
|
item_list_append(error_list,
|
||||||
_("\archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
_("\"archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->replication_lag_warning >= options->replication_lag_critical)
|
if (options->replication_lag_warning >= options->replication_lag_critical)
|
||||||
{
|
{
|
||||||
item_list_append(error_list,
|
item_list_append(error_list,
|
||||||
_("\replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
_("\"replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
|
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
|
||||||
@@ -1032,15 +1093,19 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
|||||||
* loop is started up; it therefore only needs to reload options required
|
* loop is started up; it therefore only needs to reload options required
|
||||||
* by repmgrd, which are as follows:
|
* by repmgrd, which are as follows:
|
||||||
*
|
*
|
||||||
* changeable options:
|
* changeable options (keep the list in "doc/repmgrd-configuration.sgml" in sync
|
||||||
|
* with these):
|
||||||
|
*
|
||||||
* - async_query_timeout
|
* - async_query_timeout
|
||||||
* - bdr_local_monitoring_only
|
* - bdr_local_monitoring_only
|
||||||
* - bdr_recovery_timeout
|
* - bdr_recovery_timeout
|
||||||
|
* - connection_check_type
|
||||||
* - conninfo
|
* - conninfo
|
||||||
* - degraded_monitoring_timeout
|
* - degraded_monitoring_timeout
|
||||||
* - event_notification_command
|
* - event_notification_command
|
||||||
* - event_notifications
|
* - event_notifications
|
||||||
* - failover
|
* - failover
|
||||||
|
* - failover_validation_command
|
||||||
* - follow_command
|
* - follow_command
|
||||||
* - log_facility
|
* - log_facility
|
||||||
* - log_file
|
* - log_file
|
||||||
@@ -1048,12 +1113,19 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
|||||||
* - log_status_interval
|
* - log_status_interval
|
||||||
* - monitor_interval_secs
|
* - monitor_interval_secs
|
||||||
* - monitoring_history
|
* - monitoring_history
|
||||||
|
* - primary_notification_timeout
|
||||||
|
* - primary_visibility_consensus
|
||||||
* - promote_command
|
* - promote_command
|
||||||
* - promote_delay
|
|
||||||
* - reconnect_attempts
|
* - reconnect_attempts
|
||||||
* - reconnect_interval
|
* - reconnect_interval
|
||||||
* - repmgrd_standby_startup_timeout
|
* - repmgrd_standby_startup_timeout
|
||||||
* - retry_promote_interval_secs
|
* - retry_promote_interval_secs
|
||||||
|
* - sibling_nodes_disconnect_timeout
|
||||||
|
* - standby_disconnect_on_failover
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Not publicly documented:
|
||||||
|
* - promote_delay
|
||||||
*
|
*
|
||||||
* non-changeable options (repmgrd references these from the "repmgr.nodes"
|
* non-changeable options (repmgrd references these from the "repmgr.nodes"
|
||||||
* table, not the configuration file)
|
* table, not the configuration file)
|
||||||
@@ -1132,13 +1204,12 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strncmp(new_options.node_name, orig_options->node_name, MAXLEN) != 0)
|
if (strncmp(new_options.node_name, orig_options->node_name, sizeof(orig_options->node_name)) != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No configuration problems detected - copy any changed values
|
* No configuration problems detected - copy any changed values
|
||||||
*
|
*
|
||||||
@@ -1188,8 +1259,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
{
|
{
|
||||||
strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN);
|
strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN);
|
||||||
log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo);
|
log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1267,7 +1338,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* promote_command */
|
/* promote_command */
|
||||||
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
|
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
@@ -1313,6 +1383,51 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* standby_disconnect_on_failover */
|
||||||
|
if (orig_options->standby_disconnect_on_failover != new_options.standby_disconnect_on_failover)
|
||||||
|
{
|
||||||
|
orig_options->standby_disconnect_on_failover = new_options.standby_disconnect_on_failover;
|
||||||
|
log_info(_("\"standby_disconnect_on_failover\" is now \"%s\""),
|
||||||
|
new_options.standby_disconnect_on_failover == true ? "TRUE" : "FALSE");
|
||||||
|
config_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sibling_nodes_disconnect_timeout */
|
||||||
|
if (orig_options->sibling_nodes_disconnect_timeout != new_options.sibling_nodes_disconnect_timeout)
|
||||||
|
{
|
||||||
|
orig_options->sibling_nodes_disconnect_timeout = new_options.sibling_nodes_disconnect_timeout;
|
||||||
|
log_info(_("\"sibling_nodes_disconnect_timeout\" is now \"%i\""),
|
||||||
|
new_options.sibling_nodes_disconnect_timeout);
|
||||||
|
config_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* connection_check_type */
|
||||||
|
if (orig_options->connection_check_type != new_options.connection_check_type)
|
||||||
|
{
|
||||||
|
orig_options->connection_check_type = new_options.connection_check_type;
|
||||||
|
log_info(_("\"connection_check_type\" is now \"%s\""),
|
||||||
|
print_connection_check_type(new_options.connection_check_type));
|
||||||
|
config_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* primary_visibility_consensus */
|
||||||
|
if (orig_options->primary_visibility_consensus != new_options.primary_visibility_consensus)
|
||||||
|
{
|
||||||
|
orig_options->primary_visibility_consensus = new_options.primary_visibility_consensus;
|
||||||
|
log_info(_("\"primary_visibility_consensus\" is now \"%s\""),
|
||||||
|
new_options.primary_visibility_consensus == true ? "TRUE" : "FALSE");
|
||||||
|
config_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* failover_validation_command */
|
||||||
|
if (strncmp(orig_options->failover_validation_command, new_options.failover_validation_command, MAXPGPATH) != 0)
|
||||||
|
{
|
||||||
|
strncpy(orig_options->failover_validation_command, new_options.failover_validation_command, MAXPGPATH);
|
||||||
|
log_info(_("\"failover_validation_command\" is now \"%s\""), new_options.failover_validation_command);
|
||||||
|
|
||||||
|
config_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle changes to logging configuration
|
* Handle changes to logging configuration
|
||||||
*/
|
*/
|
||||||
@@ -1524,13 +1639,16 @@ repmgr_atoi(const char *value, const char *config_item, ItemList *error_list, in
|
|||||||
*
|
*
|
||||||
* TODO: accept "any unambiguous prefix of one of these" as per postgresql.conf:
|
* TODO: accept "any unambiguous prefix of one of these" as per postgresql.conf:
|
||||||
*
|
*
|
||||||
* https://www.postgresql.org/docs/current/static/config-setting.html
|
* https://www.postgresql.org/docs/current/config-setting.html
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
parse_bool(const char *s, const char *config_item, ItemList *error_list)
|
parse_bool(const char *s, const char *config_item, ItemList *error_list)
|
||||||
{
|
{
|
||||||
PQExpBufferData errors;
|
PQExpBufferData errors;
|
||||||
|
|
||||||
|
if (s == NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (strcasecmp(s, "0") == 0)
|
if (strcasecmp(s, "0") == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@@ -1907,3 +2025,21 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
|
|||||||
|
|
||||||
return backup_options_ok;
|
return backup_options_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *
|
||||||
|
print_connection_check_type(ConnectionCheckType type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CHECK_PING:
|
||||||
|
return "ping";
|
||||||
|
case CHECK_QUERY:
|
||||||
|
return "query";
|
||||||
|
case CHECK_CONNECTION:
|
||||||
|
return "connection";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should never reach here */
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
|||||||
45
configfile.h
45
configfile.h
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* configfile.h
|
* configfile.h
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
@@ -37,6 +37,13 @@ typedef enum
|
|||||||
FAILOVER_AUTOMATIC
|
FAILOVER_AUTOMATIC
|
||||||
} failover_mode_opt;
|
} failover_mode_opt;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
CHECK_PING,
|
||||||
|
CHECK_QUERY,
|
||||||
|
CHECK_CONNECTION
|
||||||
|
} ConnectionCheckType;
|
||||||
|
|
||||||
typedef struct EventNotificationListCell
|
typedef struct EventNotificationListCell
|
||||||
{
|
{
|
||||||
struct EventNotificationListCell *next;
|
struct EventNotificationListCell *next;
|
||||||
@@ -69,7 +76,7 @@ typedef struct
|
|||||||
{
|
{
|
||||||
/* node information */
|
/* node information */
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char conninfo[MAXLEN];
|
char conninfo[MAXLEN];
|
||||||
char replication_user[NAMEDATALEN];
|
char replication_user[NAMEDATALEN];
|
||||||
char data_directory[MAXPGPATH];
|
char data_directory[MAXPGPATH];
|
||||||
@@ -106,6 +113,7 @@ typedef struct
|
|||||||
/* standby switchover settings */
|
/* standby switchover settings */
|
||||||
int shutdown_check_timeout;
|
int shutdown_check_timeout;
|
||||||
int standby_reconnect_timeout;
|
int standby_reconnect_timeout;
|
||||||
|
int wal_receive_check_timeout;
|
||||||
|
|
||||||
/* node rejoin settings */
|
/* node rejoin settings */
|
||||||
int node_rejoin_timeout;
|
int node_rejoin_timeout;
|
||||||
@@ -134,6 +142,12 @@ typedef struct
|
|||||||
int primary_notification_timeout;
|
int primary_notification_timeout;
|
||||||
int repmgrd_standby_startup_timeout;
|
int repmgrd_standby_startup_timeout;
|
||||||
char repmgrd_pid_file[MAXPGPATH];
|
char repmgrd_pid_file[MAXPGPATH];
|
||||||
|
bool standby_disconnect_on_failover;
|
||||||
|
int sibling_nodes_disconnect_timeout;
|
||||||
|
ConnectionCheckType connection_check_type;
|
||||||
|
bool primary_visibility_consensus;
|
||||||
|
char failover_validation_command[MAXPGPATH];
|
||||||
|
int election_rerun_interval;
|
||||||
|
|
||||||
/* BDR settings */
|
/* BDR settings */
|
||||||
bool bdr_local_monitoring_only;
|
bool bdr_local_monitoring_only;
|
||||||
@@ -141,14 +155,18 @@ typedef struct
|
|||||||
|
|
||||||
/* service settings */
|
/* service settings */
|
||||||
char pg_ctl_options[MAXLEN];
|
char pg_ctl_options[MAXLEN];
|
||||||
char service_stop_command[MAXLEN];
|
char service_start_command[MAXPGPATH];
|
||||||
char service_start_command[MAXLEN];
|
char service_stop_command[MAXPGPATH];
|
||||||
char service_restart_command[MAXLEN];
|
char service_restart_command[MAXPGPATH];
|
||||||
char service_reload_command[MAXLEN];
|
char service_reload_command[MAXPGPATH];
|
||||||
char service_promote_command[MAXLEN];
|
char service_promote_command[MAXPGPATH];
|
||||||
|
|
||||||
|
/* repmgrd service settings */
|
||||||
|
char repmgrd_service_start_command[MAXPGPATH];
|
||||||
|
char repmgrd_service_stop_command[MAXPGPATH];
|
||||||
|
|
||||||
/* event notification settings */
|
/* event notification settings */
|
||||||
char event_notification_command[MAXLEN];
|
char event_notification_command[MAXPGPATH];
|
||||||
char event_notifications_orig[MAXLEN];
|
char event_notifications_orig[MAXLEN];
|
||||||
EventNotificationList event_notifications;
|
EventNotificationList event_notifications;
|
||||||
|
|
||||||
@@ -174,7 +192,7 @@ typedef struct
|
|||||||
/* node information */ \
|
/* node information */ \
|
||||||
UNKNOWN_NODE_ID, "", "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL, \
|
UNKNOWN_NODE_ID, "", "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL, \
|
||||||
/* log settings */ \
|
/* log settings */ \
|
||||||
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
||||||
/* standby clone settings */ \
|
/* standby clone settings */ \
|
||||||
false, "", "", { NULL, NULL }, "", false, "", false, "", \
|
false, "", "", { NULL, NULL }, "", false, "", false, "", \
|
||||||
/* standby promote settings */ \
|
/* standby promote settings */ \
|
||||||
@@ -185,6 +203,7 @@ typedef struct
|
|||||||
/* standby switchover settings */ \
|
/* standby switchover settings */ \
|
||||||
DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
|
DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
|
||||||
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||||
|
DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT, \
|
||||||
/* node rejoin settings */ \
|
/* node rejoin settings */ \
|
||||||
DEFAULT_NODE_REJOIN_TIMEOUT, \
|
DEFAULT_NODE_REJOIN_TIMEOUT, \
|
||||||
/* node check settings */ \
|
/* node check settings */ \
|
||||||
@@ -199,12 +218,15 @@ typedef struct
|
|||||||
DEFAULT_RECONNECTION_INTERVAL, \
|
DEFAULT_RECONNECTION_INTERVAL, \
|
||||||
false, -1, \
|
false, -1, \
|
||||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||||
-1, "", \
|
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \
|
||||||
|
CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \
|
||||||
/* BDR settings */ \
|
/* BDR settings */ \
|
||||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||||
/* service settings */ \
|
/* service settings */ \
|
||||||
"", "", "", "", "", "", \
|
"", "", "", "", "", "", \
|
||||||
|
/* repmgrd service settings */ \
|
||||||
|
"", "", \
|
||||||
/* event notification settings */ \
|
/* event notification settings */ \
|
||||||
"", "", { NULL, NULL }, \
|
"", "", { NULL, NULL }, \
|
||||||
/* barman settings */ \
|
/* barman settings */ \
|
||||||
@@ -307,5 +329,6 @@ void free_parsed_argv(char ***argv_array);
|
|||||||
/* called by repmgr-client and repmgrd */
|
/* called by repmgr-client and repmgrd */
|
||||||
void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
|
void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
|
||||||
void print_item_list(ItemList *item_list);
|
void print_item_list(ItemList *item_list);
|
||||||
|
const char *print_connection_check_type(ConnectionCheckType type);
|
||||||
|
|
||||||
#endif /* _REPMGR_CONFIGFILE_H_ */
|
#endif /* _REPMGR_CONFIGFILE_H_ */
|
||||||
|
|||||||
38
configure
vendored
38
configure
vendored
@@ -1,8 +1,8 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for repmgr 4.2.
|
# Generated by GNU Autoconf 2.69 for repmgr 4.3.
|
||||||
#
|
#
|
||||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
# Report bugs to <repmgr@googlegroups.com>.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
# This configure script is free software; the Free Software Foundation
|
# This configure script is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy, distribute and modify it.
|
# gives unlimited permission to copy, distribute and modify it.
|
||||||
#
|
#
|
||||||
# Copyright (c) 2010-2018, 2ndQuadrant Ltd.
|
# Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
||||||
## -------------------- ##
|
## -------------------- ##
|
||||||
## M4sh Initialization. ##
|
## M4sh Initialization. ##
|
||||||
## -------------------- ##
|
## -------------------- ##
|
||||||
@@ -269,7 +269,7 @@ fi
|
|||||||
$as_echo "$0: be upgraded to zsh 4.3.4 or later."
|
$as_echo "$0: be upgraded to zsh 4.3.4 or later."
|
||||||
else
|
else
|
||||||
$as_echo "$0: Please tell bug-autoconf@gnu.org and
|
$as_echo "$0: Please tell bug-autoconf@gnu.org and
|
||||||
$0: pgsql-bugs@postgresql.org about your system, including
|
$0: repmgr@googlegroups.com about your system, including
|
||||||
$0: any error possibly output before this message. Then
|
$0: any error possibly output before this message. Then
|
||||||
$0: install a modern shell, or manually run the script
|
$0: install a modern shell, or manually run the script
|
||||||
$0: under such a shell if you do have one."
|
$0: under such a shell if you do have one."
|
||||||
@@ -582,10 +582,10 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='repmgr'
|
PACKAGE_NAME='repmgr'
|
||||||
PACKAGE_TARNAME='repmgr'
|
PACKAGE_TARNAME='repmgr'
|
||||||
PACKAGE_VERSION='4.2'
|
PACKAGE_VERSION='4.3'
|
||||||
PACKAGE_STRING='repmgr 4.2'
|
PACKAGE_STRING='repmgr 4.3'
|
||||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
||||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
PACKAGE_URL='https://repmgr.org/'
|
||||||
|
|
||||||
ac_subst_vars='LTLIBOBJS
|
ac_subst_vars='LTLIBOBJS
|
||||||
LIBOBJS
|
LIBOBJS
|
||||||
@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures repmgr 4.2 to adapt to many kinds of systems.
|
\`configure' configures repmgr 4.3 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1239,7 +1239,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of repmgr 4.2:";;
|
short | recursive ) echo "Configuration of repmgr 4.3:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1249,8 +1249,8 @@ Some influential environment variables:
|
|||||||
Use these variables to override the choices made by `configure' or to help
|
Use these variables to override the choices made by `configure' or to help
|
||||||
it to find libraries and programs with nonstandard names/locations.
|
it to find libraries and programs with nonstandard names/locations.
|
||||||
|
|
||||||
Report bugs to <pgsql-bugs@postgresql.org>.
|
Report bugs to <repmgr@googlegroups.com>.
|
||||||
repmgr home page: <https://2ndquadrant.com/en/resources/repmgr/>.
|
repmgr home page: <https://repmgr.org/>.
|
||||||
_ACEOF
|
_ACEOF
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
fi
|
fi
|
||||||
@@ -1313,14 +1313,14 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
repmgr configure 4.2
|
repmgr configure 4.3
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
This configure script is free software; the Free Software Foundation
|
This configure script is free software; the Free Software Foundation
|
||||||
gives unlimited permission to copy, distribute and modify it.
|
gives unlimited permission to copy, distribute and modify it.
|
||||||
|
|
||||||
Copyright (c) 2010-2018, 2ndQuadrant Ltd.
|
Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
||||||
_ACEOF
|
_ACEOF
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by repmgr $as_me 4.2, which was
|
It was created by repmgr $as_me 4.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by repmgr $as_me 4.2, which was
|
This file was extended by repmgr $as_me 4.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -2415,14 +2415,14 @@ $config_files
|
|||||||
Configuration headers:
|
Configuration headers:
|
||||||
$config_headers
|
$config_headers
|
||||||
|
|
||||||
Report bugs to <pgsql-bugs@postgresql.org>.
|
Report bugs to <repmgr@googlegroups.com>.
|
||||||
repmgr home page: <https://2ndquadrant.com/en/resources/repmgr/>."
|
repmgr home page: <https://repmgr.org/>."
|
||||||
|
|
||||||
_ACEOF
|
_ACEOF
|
||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
repmgr config.status 4.2
|
repmgr config.status 4.3
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
AC_INIT([repmgr], [4.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
AC_INIT([repmgr], [4.3], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
||||||
|
|
||||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
AC_COPYRIGHT([Copyright (c) 2010-2019, 2ndQuadrant Ltd.])
|
||||||
|
|
||||||
AC_CONFIG_HEADER(config.h)
|
AC_CONFIG_HEADER(config.h)
|
||||||
|
|
||||||
|
|||||||
175
controldata.c
175
controldata.c
@@ -1,6 +1,12 @@
|
|||||||
/*
|
/*
|
||||||
* controldata.c
|
* controldata.c - functions for reading the pg_control file
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
*
|
||||||
|
* The functions provided here enable repmgr to read a pg_control file
|
||||||
|
* in a version-indepent way, even if the PostgreSQL instance is not
|
||||||
|
* running. For that reason we can't use on the pg_control_*() functions
|
||||||
|
* provided in PostgreSQL 9.6 and later.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
@@ -30,6 +36,53 @@
|
|||||||
|
|
||||||
static ControlFileInfo *get_controlfile(const char *DataDir);
|
static ControlFileInfo *get_controlfile(const char *DataDir);
|
||||||
|
|
||||||
|
int
|
||||||
|
get_pg_version(const char *data_directory, char *version_string)
|
||||||
|
{
|
||||||
|
char PgVersionPath[MAXPGPATH] = "";
|
||||||
|
FILE *fp = NULL;
|
||||||
|
char *endptr = NULL;
|
||||||
|
char file_version_string[MAX_VERSION_STRING] = "";
|
||||||
|
long file_major, file_minor;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
snprintf(PgVersionPath, MAXPGPATH, "%s/PG_VERSION", data_directory);
|
||||||
|
|
||||||
|
fp = fopen(PgVersionPath, "r");
|
||||||
|
|
||||||
|
if (fp == NULL)
|
||||||
|
{
|
||||||
|
log_warning(_("could not open file \"%s\" for reading"),
|
||||||
|
PgVersionPath);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
return UNKNOWN_SERVER_VERSION_NUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_version_string[0] = '\0';
|
||||||
|
|
||||||
|
ret = fscanf(fp, "%23s", file_version_string);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
if (ret != 1 || endptr == file_version_string)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to determine major version number from PG_VERSION"));
|
||||||
|
|
||||||
|
return UNKNOWN_SERVER_VERSION_NUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_major = strtol(file_version_string, &endptr, 10);
|
||||||
|
file_minor = 0;
|
||||||
|
|
||||||
|
if (*endptr == '.')
|
||||||
|
file_minor = strtol(endptr + 1, NULL, 10);
|
||||||
|
|
||||||
|
if (version_string != NULL)
|
||||||
|
strncpy(version_string, file_version_string, MAX_VERSION_STRING);
|
||||||
|
|
||||||
|
return ((int) file_major * 10000) + ((int) file_minor * 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint64
|
uint64
|
||||||
get_system_identifier(const char *data_directory)
|
get_system_identifier(const char *data_directory)
|
||||||
{
|
{
|
||||||
@@ -44,6 +97,7 @@ get_system_identifier(const char *data_directory)
|
|||||||
return system_identifier;
|
return system_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DBState
|
DBState
|
||||||
get_db_state(const char *data_directory)
|
get_db_state(const char *data_directory)
|
||||||
{
|
{
|
||||||
@@ -60,7 +114,7 @@ get_db_state(const char *data_directory)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern XLogRecPtr
|
XLogRecPtr
|
||||||
get_latest_checkpoint_location(const char *data_directory)
|
get_latest_checkpoint_location(const char *data_directory)
|
||||||
{
|
{
|
||||||
ControlFileInfo *control_file_info = NULL;
|
ControlFileInfo *control_file_info = NULL;
|
||||||
@@ -112,10 +166,59 @@ describe_db_state(DBState state)
|
|||||||
case DB_IN_PRODUCTION:
|
case DB_IN_PRODUCTION:
|
||||||
return _("in production");
|
return _("in production");
|
||||||
}
|
}
|
||||||
|
|
||||||
return _("unrecognized status code");
|
return _("unrecognized status code");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TimeLineID
|
||||||
|
get_timeline(const char *data_directory)
|
||||||
|
{
|
||||||
|
ControlFileInfo *control_file_info = NULL;
|
||||||
|
TimeLineID timeline = -1;
|
||||||
|
|
||||||
|
control_file_info = get_controlfile(data_directory);
|
||||||
|
|
||||||
|
timeline = (int) control_file_info->timeline;
|
||||||
|
|
||||||
|
pfree(control_file_info);
|
||||||
|
|
||||||
|
return timeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TimeLineID
|
||||||
|
get_min_recovery_end_timeline(const char *data_directory)
|
||||||
|
{
|
||||||
|
ControlFileInfo *control_file_info = NULL;
|
||||||
|
TimeLineID timeline = -1;
|
||||||
|
|
||||||
|
control_file_info = get_controlfile(data_directory);
|
||||||
|
|
||||||
|
timeline = (int) control_file_info->minRecoveryPointTLI;
|
||||||
|
|
||||||
|
pfree(control_file_info);
|
||||||
|
|
||||||
|
return timeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
XLogRecPtr
|
||||||
|
get_min_recovery_location(const char *data_directory)
|
||||||
|
{
|
||||||
|
ControlFileInfo *control_file_info = NULL;
|
||||||
|
XLogRecPtr minRecoveryPoint = InvalidXLogRecPtr;
|
||||||
|
|
||||||
|
control_file_info = get_controlfile(data_directory);
|
||||||
|
|
||||||
|
minRecoveryPoint = control_file_info->minRecoveryPoint;
|
||||||
|
|
||||||
|
pfree(control_file_info);
|
||||||
|
|
||||||
|
return minRecoveryPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We maintain our own version of get_controlfile() as we need cross-version
|
* We maintain our own version of get_controlfile() as we need cross-version
|
||||||
* compatibility, and also don't care if the file isn't readable.
|
* compatibility, and also don't care if the file isn't readable.
|
||||||
@@ -123,14 +226,10 @@ describe_db_state(DBState state)
|
|||||||
static ControlFileInfo *
|
static ControlFileInfo *
|
||||||
get_controlfile(const char *DataDir)
|
get_controlfile(const char *DataDir)
|
||||||
{
|
{
|
||||||
|
char file_version_string[MAX_VERSION_STRING] = "";
|
||||||
ControlFileInfo *control_file_info;
|
ControlFileInfo *control_file_info;
|
||||||
FILE *fp = NULL;
|
int fd, version_num;
|
||||||
int fd, ret, version_num;
|
|
||||||
char PgVersionPath[MAXPGPATH] = "";
|
|
||||||
char ControlFilePath[MAXPGPATH] = "";
|
char ControlFilePath[MAXPGPATH] = "";
|
||||||
char file_version_string[64] = "";
|
|
||||||
long file_major, file_minor;
|
|
||||||
char *endptr = NULL;
|
|
||||||
void *ControlFileDataPtr = NULL;
|
void *ControlFileDataPtr = NULL;
|
||||||
int expected_size = 0;
|
int expected_size = 0;
|
||||||
|
|
||||||
@@ -142,50 +241,32 @@ get_controlfile(const char *DataDir)
|
|||||||
control_file_info->state = DB_SHUTDOWNED;
|
control_file_info->state = DB_SHUTDOWNED;
|
||||||
control_file_info->checkPoint = InvalidXLogRecPtr;
|
control_file_info->checkPoint = InvalidXLogRecPtr;
|
||||||
control_file_info->data_checksum_version = -1;
|
control_file_info->data_checksum_version = -1;
|
||||||
|
control_file_info->timeline = -1;
|
||||||
|
control_file_info->minRecoveryPointTLI = -1;
|
||||||
|
control_file_info->minRecoveryPoint = InvalidXLogRecPtr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read PG_VERSION, as we'll need to determine which struct to read
|
* Read PG_VERSION, as we'll need to determine which struct to read
|
||||||
* the control file contents into
|
* the control file contents into
|
||||||
*/
|
*/
|
||||||
snprintf(PgVersionPath, MAXPGPATH, "%s/PG_VERSION", DataDir);
|
|
||||||
|
|
||||||
fp = fopen(PgVersionPath, "r");
|
version_num = get_pg_version(DataDir, file_version_string);
|
||||||
|
|
||||||
if (fp == NULL)
|
if (version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||||
{
|
{
|
||||||
log_warning(_("could not open file \"%s\" for reading"),
|
log_warning(_("unable to determine server version number from PG_VERSION"));
|
||||||
PgVersionPath);
|
|
||||||
log_detail("%s", strerror(errno));
|
|
||||||
return control_file_info;
|
return control_file_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
file_version_string[0] = '\0';
|
if (version_num < MIN_SUPPORTED_VERSION_NUM)
|
||||||
|
|
||||||
ret = fscanf(fp, "%63s", file_version_string);
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
if (ret != 1 || endptr == file_version_string)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to determine major version number from PG_VERSION"));
|
log_warning(_("data directory appears to be initialised for %s"),
|
||||||
|
file_version_string);
|
||||||
|
log_detail(_("minimum supported PostgreSQL version is %s"),
|
||||||
|
MIN_SUPPORTED_VERSION);
|
||||||
return control_file_info;
|
return control_file_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
file_major = strtol(file_version_string, &endptr, 10);
|
|
||||||
file_minor = 0;
|
|
||||||
|
|
||||||
if (*endptr == '.')
|
|
||||||
file_minor = strtol(endptr + 1, NULL, 10);
|
|
||||||
|
|
||||||
version_num = ((int) file_major * 10000) + ((int) file_minor * 100);
|
|
||||||
|
|
||||||
if (version_num < 90300)
|
|
||||||
{
|
|
||||||
log_warning(_("Data directory appears to be initialised for %s"), file_version_string);
|
|
||||||
return control_file_info;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
|
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
|
||||||
|
|
||||||
if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1)
|
if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1)
|
||||||
@@ -220,6 +301,8 @@ get_controlfile(const char *DataDir)
|
|||||||
ControlFilePath);
|
ControlFilePath);
|
||||||
log_detail("%s", strerror(errno));
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
|
||||||
return control_file_info;
|
return control_file_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -234,6 +317,9 @@ get_controlfile(const char *DataDir)
|
|||||||
control_file_info->state = ptr->state;
|
control_file_info->state = ptr->state;
|
||||||
control_file_info->checkPoint = ptr->checkPoint;
|
control_file_info->checkPoint = ptr->checkPoint;
|
||||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||||
|
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||||
|
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||||
|
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||||
}
|
}
|
||||||
else if (version_num >= 90500)
|
else if (version_num >= 90500)
|
||||||
{
|
{
|
||||||
@@ -242,6 +328,9 @@ get_controlfile(const char *DataDir)
|
|||||||
control_file_info->state = ptr->state;
|
control_file_info->state = ptr->state;
|
||||||
control_file_info->checkPoint = ptr->checkPoint;
|
control_file_info->checkPoint = ptr->checkPoint;
|
||||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||||
|
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||||
|
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||||
|
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||||
}
|
}
|
||||||
else if (version_num >= 90400)
|
else if (version_num >= 90400)
|
||||||
{
|
{
|
||||||
@@ -250,6 +339,9 @@ get_controlfile(const char *DataDir)
|
|||||||
control_file_info->state = ptr->state;
|
control_file_info->state = ptr->state;
|
||||||
control_file_info->checkPoint = ptr->checkPoint;
|
control_file_info->checkPoint = ptr->checkPoint;
|
||||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||||
|
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||||
|
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||||
|
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||||
}
|
}
|
||||||
else if (version_num >= 90300)
|
else if (version_num >= 90300)
|
||||||
{
|
{
|
||||||
@@ -258,6 +350,9 @@ get_controlfile(const char *DataDir)
|
|||||||
control_file_info->state = ptr->state;
|
control_file_info->state = ptr->state;
|
||||||
control_file_info->checkPoint = ptr->checkPoint;
|
control_file_info->checkPoint = ptr->checkPoint;
|
||||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||||
|
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||||
|
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||||
|
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfree(ControlFileDataPtr);
|
pfree(ControlFileDataPtr);
|
||||||
@@ -265,9 +360,7 @@ get_controlfile(const char *DataDir)
|
|||||||
/*
|
/*
|
||||||
* We don't check the CRC here as we're potentially checking a pg_control
|
* We don't check the CRC here as we're potentially checking a pg_control
|
||||||
* file from a different PostgreSQL version to the one repmgr was compiled
|
* file from a different PostgreSQL version to the one repmgr was compiled
|
||||||
* against. However we're only interested in the first few fields, which
|
* against.
|
||||||
* should be constant across supported versions
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return control_file_info;
|
return control_file_info;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* controldata.h
|
* controldata.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
@@ -12,6 +12,7 @@
|
|||||||
#include "postgres_fe.h"
|
#include "postgres_fe.h"
|
||||||
#include "catalog/pg_control.h"
|
#include "catalog/pg_control.h"
|
||||||
|
|
||||||
|
#define MAX_VERSION_STRING 24
|
||||||
/*
|
/*
|
||||||
* A simplified representation of pg_control containing only those fields
|
* A simplified representation of pg_control containing only those fields
|
||||||
* required by repmgr.
|
* required by repmgr.
|
||||||
@@ -23,6 +24,9 @@ typedef struct
|
|||||||
DBState state;
|
DBState state;
|
||||||
XLogRecPtr checkPoint;
|
XLogRecPtr checkPoint;
|
||||||
uint32 data_checksum_version;
|
uint32 data_checksum_version;
|
||||||
|
TimeLineID timeline;
|
||||||
|
TimeLineID minRecoveryPointTLI;
|
||||||
|
XLogRecPtr minRecoveryPoint;
|
||||||
} ControlFileInfo;
|
} ControlFileInfo;
|
||||||
|
|
||||||
|
|
||||||
@@ -134,13 +138,11 @@ typedef struct ControlFileData93
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Following fields added since 9.3:
|
* Following field added since 9.3:
|
||||||
*
|
*
|
||||||
* int max_worker_processes;
|
* int max_worker_processes;
|
||||||
* int max_prepared_xacts;
|
|
||||||
* int max_locks_per_xact;
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct ControlFileData94
|
typedef struct ControlFileData94
|
||||||
{
|
{
|
||||||
uint64 system_identifier;
|
uint64 system_identifier;
|
||||||
@@ -331,11 +333,14 @@ typedef struct ControlFileData11
|
|||||||
} ControlFileData11;
|
} ControlFileData11;
|
||||||
|
|
||||||
|
|
||||||
|
extern int get_pg_version(const char *data_directory, char *version_string);
|
||||||
extern DBState get_db_state(const char *data_directory);
|
extern DBState get_db_state(const char *data_directory);
|
||||||
extern const char *describe_db_state(DBState state);
|
extern const char *describe_db_state(DBState state);
|
||||||
extern int get_data_checksum_version(const char *data_directory);
|
extern int get_data_checksum_version(const char *data_directory);
|
||||||
extern uint64 get_system_identifier(const char *data_directory);
|
extern uint64 get_system_identifier(const char *data_directory);
|
||||||
extern XLogRecPtr get_latest_checkpoint_location(const char *data_directory);
|
extern XLogRecPtr get_latest_checkpoint_location(const char *data_directory);
|
||||||
|
extern TimeLineID get_timeline(const char *data_directory);
|
||||||
|
extern TimeLineID get_min_recovery_end_timeline(const char *data_directory);
|
||||||
|
extern XLogRecPtr get_min_recovery_location(const char *data_directory);
|
||||||
|
|
||||||
#endif /* _CONTROLDATA_H_ */
|
#endif /* _CONTROLDATA_H_ */
|
||||||
|
|||||||
99
dbutils.h
99
dbutils.h
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.h
|
* dbutils.h
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -20,6 +20,7 @@
|
|||||||
#ifndef _REPMGR_DBUTILS_H_
|
#ifndef _REPMGR_DBUTILS_H_
|
||||||
#define _REPMGR_DBUTILS_H_
|
#define _REPMGR_DBUTILS_H_
|
||||||
|
|
||||||
|
#include "access/timeline.h"
|
||||||
#include "access/xlogdefs.h"
|
#include "access/xlogdefs.h"
|
||||||
#include "pqexpbuffer.h"
|
#include "pqexpbuffer.h"
|
||||||
#include "portability/instr_time.h"
|
#include "portability/instr_time.h"
|
||||||
@@ -47,6 +48,7 @@ typedef enum
|
|||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
REPMGR_INSTALLED = 0,
|
REPMGR_INSTALLED = 0,
|
||||||
|
REPMGR_OLD_VERSION_INSTALLED,
|
||||||
REPMGR_AVAILABLE,
|
REPMGR_AVAILABLE,
|
||||||
REPMGR_UNAVAILABLE,
|
REPMGR_UNAVAILABLE,
|
||||||
REPMGR_UNKNOWN
|
REPMGR_UNKNOWN
|
||||||
@@ -78,7 +80,8 @@ typedef enum
|
|||||||
NODE_STATUS_UP,
|
NODE_STATUS_UP,
|
||||||
NODE_STATUS_SHUTTING_DOWN,
|
NODE_STATUS_SHUTTING_DOWN,
|
||||||
NODE_STATUS_DOWN,
|
NODE_STATUS_DOWN,
|
||||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
NODE_STATUS_UNCLEAN_SHUTDOWN,
|
||||||
|
NODE_STATUS_REJECTED
|
||||||
} NodeStatus;
|
} NodeStatus;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
@@ -104,6 +107,24 @@ typedef enum
|
|||||||
} BackupState;
|
} BackupState;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Struct to store extension version information
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct s_extension_versions {
|
||||||
|
char default_version[8];
|
||||||
|
int default_version_num;
|
||||||
|
char installed_version[8];
|
||||||
|
int installed_version_num;
|
||||||
|
} t_extension_versions;
|
||||||
|
|
||||||
|
#define T_EXTENSION_VERSIONS_INITIALIZER { \
|
||||||
|
"", \
|
||||||
|
UNKNOWN_SERVER_VERSION_NUM, \
|
||||||
|
"", \
|
||||||
|
UNKNOWN_SERVER_VERSION_NUM \
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct to store node information
|
* Struct to store node information
|
||||||
*/
|
*/
|
||||||
@@ -113,8 +134,8 @@ typedef struct s_node_info
|
|||||||
int node_id;
|
int node_id;
|
||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
t_server_type type;
|
t_server_type type;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char upstream_node_name[MAXLEN];
|
char upstream_node_name[NAMEDATALEN];
|
||||||
char conninfo[MAXLEN];
|
char conninfo[MAXLEN];
|
||||||
char repluser[NAMEDATALEN];
|
char repluser[NAMEDATALEN];
|
||||||
char location[MAXLEN];
|
char location[MAXLEN];
|
||||||
@@ -163,7 +184,7 @@ typedef struct s_node_info
|
|||||||
MS_NORMAL, \
|
MS_NORMAL, \
|
||||||
NULL, \
|
NULL, \
|
||||||
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
||||||
"", true, true \
|
"", true, true, \
|
||||||
/* various statistics */ \
|
/* various statistics */ \
|
||||||
-1, -1, -1, -1, -1, -1 \
|
-1, -1, -1, -1, -1, -1 \
|
||||||
}
|
}
|
||||||
@@ -281,22 +302,16 @@ typedef struct BdrNodeInfoList
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
char current_timestamp[MAXLEN];
|
char current_timestamp[MAXLEN];
|
||||||
uint64 last_wal_receive_lsn;
|
bool in_recovery;
|
||||||
uint64 last_wal_replay_lsn;
|
XLogRecPtr last_wal_receive_lsn;
|
||||||
|
XLogRecPtr last_wal_replay_lsn;
|
||||||
char last_xact_replay_timestamp[MAXLEN];
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
int replication_lag_time;
|
int replication_lag_time;
|
||||||
bool receiving_streamed_wal;
|
bool receiving_streamed_wal;
|
||||||
|
bool wal_replay_paused;
|
||||||
|
int upstream_last_seen;
|
||||||
} ReplInfo;
|
} ReplInfo;
|
||||||
|
|
||||||
#define T_REPLINFO_INTIALIZER { \
|
|
||||||
"", \
|
|
||||||
InvalidXLogRecPtr, \
|
|
||||||
InvalidXLogRecPtr, \
|
|
||||||
"", \
|
|
||||||
0 \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
char filepath[MAXPGPATH];
|
char filepath[MAXPGPATH];
|
||||||
@@ -336,16 +351,16 @@ typedef struct RepmgrdInfo {
|
|||||||
char pid_file[MAXLEN];
|
char pid_file[MAXLEN];
|
||||||
bool pg_running;
|
bool pg_running;
|
||||||
char pg_running_text[MAXLEN];
|
char pg_running_text[MAXLEN];
|
||||||
|
RecoveryType recovery_type;
|
||||||
bool running;
|
bool running;
|
||||||
char repmgrd_running[MAXLEN];
|
char repmgrd_running[MAXLEN];
|
||||||
bool paused;
|
bool paused;
|
||||||
|
bool wal_paused_pending_wal;
|
||||||
|
int upstream_last_seen;
|
||||||
|
char upstream_last_seen_text[MAXLEN];
|
||||||
} RepmgrdInfo;
|
} RepmgrdInfo;
|
||||||
|
|
||||||
|
|
||||||
/* global variables */
|
|
||||||
|
|
||||||
extern int server_version_num;
|
|
||||||
|
|
||||||
/* macros */
|
/* macros */
|
||||||
|
|
||||||
#define is_streaming_replication(x) (x == PRIMARY || x == STANDBY)
|
#define is_streaming_replication(x) (x == PRIMARY || x == STANDBY)
|
||||||
@@ -361,15 +376,13 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
|||||||
bool atobool(const char *value);
|
bool atobool(const char *value);
|
||||||
|
|
||||||
/* connection functions */
|
/* connection functions */
|
||||||
PGconn *establish_db_connection(const char *conninfo,
|
PGconn *establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||||
|
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
|
|
||||||
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||||
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||||
|
|
||||||
@@ -388,6 +401,7 @@ void param_set_ine(t_conninfo_param_list *param_list, const char *param, const
|
|||||||
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
||||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char **errmsg, bool ignore_local_params);
|
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char **errmsg, bool ignore_local_params);
|
||||||
char *param_list_to_string(t_conninfo_param_list *param_list);
|
char *param_list_to_string(t_conninfo_param_list *param_list);
|
||||||
|
char *normalize_conninfo_string(const char *conninfo_str);
|
||||||
bool has_passfile(void);
|
bool has_passfile(void);
|
||||||
|
|
||||||
|
|
||||||
@@ -395,7 +409,6 @@ bool has_passfile(void);
|
|||||||
bool begin_transaction(PGconn *conn);
|
bool begin_transaction(PGconn *conn);
|
||||||
bool commit_transaction(PGconn *conn);
|
bool commit_transaction(PGconn *conn);
|
||||||
bool rollback_transaction(PGconn *conn);
|
bool rollback_transaction(PGconn *conn);
|
||||||
bool check_cluster_schema(PGconn *conn);
|
|
||||||
|
|
||||||
/* GUC manipulation functions */
|
/* GUC manipulation functions */
|
||||||
bool set_config(PGconn *conn, const char *config_param, const char *config_value);
|
bool set_config(PGconn *conn, const char *config_param, const char *config_value);
|
||||||
@@ -403,36 +416,47 @@ bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
|||||||
int guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
|
int guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
|
||||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op, const char *value, const char *datatype);
|
int guc_set_typed(PGconn *conn, const char *parameter, const char *op, const char *value, const char *datatype);
|
||||||
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
||||||
|
bool alter_system_int(PGconn *conn, const char *name, int value);
|
||||||
|
bool pg_reload_conf(PGconn *conn);
|
||||||
|
|
||||||
/* server information functions */
|
/* server information functions */
|
||||||
bool get_cluster_size(PGconn *conn, char *size);
|
bool get_cluster_size(PGconn *conn, char *size);
|
||||||
int get_server_version(PGconn *conn, char *server_version);
|
int get_server_version(PGconn *conn, char *server_version_buf);
|
||||||
|
|
||||||
RecoveryType get_recovery_type(PGconn *conn);
|
RecoveryType get_recovery_type(PGconn *conn);
|
||||||
int get_primary_node_id(PGconn *conn);
|
int get_primary_node_id(PGconn *conn);
|
||||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||||
|
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
||||||
|
|
||||||
|
/* repmgrd shared memory functions */
|
||||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||||
int repmgrd_get_local_node_id(PGconn *conn);
|
int repmgrd_get_local_node_id(PGconn *conn);
|
||||||
|
bool repmgrd_check_local_node_id(PGconn *conn);
|
||||||
BackupState server_in_exclusive_backup_mode(PGconn *conn);
|
BackupState server_in_exclusive_backup_mode(PGconn *conn);
|
||||||
void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
|
void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
|
||||||
pid_t repmgrd_get_pid(PGconn *conn);
|
pid_t repmgrd_get_pid(PGconn *conn);
|
||||||
bool repmgrd_is_running(PGconn *conn);
|
bool repmgrd_is_running(PGconn *conn);
|
||||||
bool repmgrd_is_paused(PGconn *conn);
|
bool repmgrd_is_paused(PGconn *conn);
|
||||||
bool repmgrd_pause(PGconn *conn, bool pause);
|
bool repmgrd_pause(PGconn *conn, bool pause);
|
||||||
|
pid_t get_wal_receiver_pid(PGconn *conn);
|
||||||
|
|
||||||
/* extension functions */
|
/* extension functions */
|
||||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
ExtensionStatus get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions);
|
||||||
|
|
||||||
/* node management functions */
|
/* node management functions */
|
||||||
void checkpoint(PGconn *conn);
|
void checkpoint(PGconn *conn);
|
||||||
bool vacuum_table(PGconn *conn, const char *table);
|
bool vacuum_table(PGconn *conn, const char *table);
|
||||||
|
bool promote_standby(PGconn *conn, bool wait, int wait_seconds);
|
||||||
|
bool resume_wal_replay(PGconn *conn);
|
||||||
|
|
||||||
/* node record functions */
|
/* node record functions */
|
||||||
t_server_type parse_node_type(const char *type);
|
t_server_type parse_node_type(const char *type);
|
||||||
const char *get_node_type_string(t_server_type type);
|
const char *get_node_type_string(t_server_type type);
|
||||||
|
|
||||||
RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||||
|
RecordStatus refresh_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||||
|
|
||||||
RecordStatus get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info);
|
RecordStatus get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info);
|
||||||
|
|
||||||
RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
|
RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
|
||||||
@@ -479,7 +503,7 @@ PGresult *get_event_records(PGconn *conn, int node_id, const char *node_name,
|
|||||||
|
|
||||||
/* replication slot functions */
|
/* replication slot functions */
|
||||||
void create_slot_name(char *slot_name, int node_id);
|
void create_slot_name(char *slot_name, int node_id);
|
||||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
|
bool create_replication_slot(PGconn *conn, char *slot_name, PQExpBufferData *error_msg);
|
||||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||||
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||||
int get_free_replication_slot_count(PGconn *conn);
|
int get_free_replication_slot_count(PGconn *conn);
|
||||||
@@ -490,12 +514,14 @@ bool get_tablespace_name_by_location(PGconn *conn, const char *location, char *
|
|||||||
|
|
||||||
/* asynchronous query functions */
|
/* asynchronous query functions */
|
||||||
bool cancel_query(PGconn *conn, int timeout);
|
bool cancel_query(PGconn *conn, int timeout);
|
||||||
int wait_connection_availability(PGconn *conn, long long timeout);
|
int wait_connection_availability(PGconn *conn, int timeout);
|
||||||
|
|
||||||
/* node availability functions */
|
/* node availability functions */
|
||||||
bool is_server_available(const char *conninfo);
|
bool is_server_available(const char *conninfo);
|
||||||
|
bool is_server_available_quiet(const char *conninfo);
|
||||||
bool is_server_available_params(t_conninfo_param_list *param_list);
|
bool is_server_available_params(t_conninfo_param_list *param_list);
|
||||||
ExecStatusType connection_ping(PGconn *conn);
|
ExecStatusType connection_ping(PGconn *conn);
|
||||||
|
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
||||||
|
|
||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
void
|
void
|
||||||
@@ -526,12 +552,17 @@ bool get_new_primary(PGconn *conn, int *primary_node_id);
|
|||||||
void reset_voting_status(PGconn *conn);
|
void reset_voting_status(PGconn *conn);
|
||||||
|
|
||||||
/* replication status functions */
|
/* replication status functions */
|
||||||
XLogRecPtr get_current_wal_lsn(PGconn *conn);
|
XLogRecPtr get_primary_current_lsn(PGconn *conn);
|
||||||
|
XLogRecPtr get_node_current_lsn(PGconn *conn);
|
||||||
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
||||||
bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
|
void init_replication_info(ReplInfo *replication_info);
|
||||||
|
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
||||||
int get_replication_lag_seconds(PGconn *conn);
|
int get_replication_lag_seconds(PGconn *conn);
|
||||||
void get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info);
|
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||||
bool is_downstream_node_attached(PGconn *conn, char *node_name);
|
bool is_downstream_node_attached(PGconn *conn, char *node_name);
|
||||||
|
void set_upstream_last_seen(PGconn *conn);
|
||||||
|
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||||
|
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||||
|
|
||||||
/* BDR functions */
|
/* BDR functions */
|
||||||
int get_bdr_version_num(void);
|
int get_bdr_version_num(void);
|
||||||
|
|||||||
35
dirutil.c
35
dirutil.c
@@ -3,7 +3,7 @@
|
|||||||
* dirmod.c
|
* dirmod.c
|
||||||
* directory handling functions
|
* directory handling functions
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -50,7 +50,7 @@ typedef long pgpid_t;
|
|||||||
* and tablespace directories.
|
* and tablespace directories.
|
||||||
*/
|
*/
|
||||||
DataDirState
|
DataDirState
|
||||||
check_dir(char *path)
|
check_dir(const char *path)
|
||||||
{
|
{
|
||||||
DIR *chkdir = NULL;
|
DIR *chkdir = NULL;
|
||||||
struct dirent *file = NULL;
|
struct dirent *file = NULL;
|
||||||
@@ -91,12 +91,17 @@ check_dir(char *path)
|
|||||||
* Create directory with error log message when failing
|
* Create directory with error log message when failing
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
create_dir(char *path)
|
create_dir(const char *path)
|
||||||
{
|
{
|
||||||
if (mkdir_p(path, 0700) == 0)
|
char create_dir_path[MAXPGPATH];
|
||||||
|
|
||||||
|
/* mkdir_p() may modify the supplied path */
|
||||||
|
strncpy(create_dir_path, path, MAXPGPATH);
|
||||||
|
|
||||||
|
if (mkdir_p(create_dir_path, 0700) == 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
log_error(_("unable to create directory \"%s\""), path);
|
log_error(_("unable to create directory \"%s\""), create_dir_path);
|
||||||
log_detail("%s", strerror(errno));
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@@ -104,13 +109,12 @@ create_dir(char *path)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
set_dir_permissions(char *path)
|
set_dir_permissions(const char *path)
|
||||||
{
|
{
|
||||||
return (chmod(path, 0700) != 0) ? false : true;
|
return (chmod(path, 0700) != 0) ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* function from initdb.c */
|
/* function from initdb.c */
|
||||||
/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */
|
/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */
|
||||||
|
|
||||||
@@ -198,9 +202,9 @@ mkdir_p(char *path, mode_t omode)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
is_pg_dir(char *path)
|
is_pg_dir(const char *path)
|
||||||
{
|
{
|
||||||
char dirpath[MAXPGPATH];
|
char dirpath[MAXPGPATH] = "";
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
|
|
||||||
/* test pgdata */
|
/* test pgdata */
|
||||||
@@ -223,7 +227,7 @@ is_pg_dir(char *path)
|
|||||||
* any further useful progress can be made.
|
* any further useful progress can be made.
|
||||||
*/
|
*/
|
||||||
PgDirState
|
PgDirState
|
||||||
is_pg_running(char *path)
|
is_pg_running(const char *path)
|
||||||
{
|
{
|
||||||
long pid;
|
long pid;
|
||||||
FILE *pidf;
|
FILE *pidf;
|
||||||
@@ -272,6 +276,8 @@ is_pg_running(char *path)
|
|||||||
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fclose(pidf);
|
||||||
|
|
||||||
return PG_DIR_NOT_RUNNING;
|
return PG_DIR_NOT_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -291,7 +297,7 @@ is_pg_running(char *path)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
create_pg_dir(char *path, bool force)
|
create_pg_dir(const char *path, bool force)
|
||||||
{
|
{
|
||||||
/* Check this directory can be used as a PGDATA dir */
|
/* Check this directory can be used as a PGDATA dir */
|
||||||
switch (check_dir(path))
|
switch (check_dir(path))
|
||||||
@@ -347,8 +353,9 @@ create_pg_dir(char *path, bool force)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DIR_ERROR:
|
case DIR_ERROR:
|
||||||
log_error(_("could not access directory \"%s\": %s"),
|
log_error(_("could not access directory \"%s\"")
|
||||||
path, strerror(errno));
|
, path);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -358,7 +365,7 @@ create_pg_dir(char *path, bool force)
|
|||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
rmdir_recursive(char *path)
|
rmdir_recursive(const char *path)
|
||||||
{
|
{
|
||||||
return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||||
}
|
}
|
||||||
|
|||||||
16
dirutil.h
16
dirutil.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dirutil.h
|
* dirutil.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,13 +35,13 @@ typedef enum
|
|||||||
} PgDirState;
|
} PgDirState;
|
||||||
|
|
||||||
extern int mkdir_p(char *path, mode_t omode);
|
extern int mkdir_p(char *path, mode_t omode);
|
||||||
extern bool set_dir_permissions(char *path);
|
extern bool set_dir_permissions(const char *path);
|
||||||
|
|
||||||
extern DataDirState check_dir(char *path);
|
extern DataDirState check_dir(const char *path);
|
||||||
extern bool create_dir(char *path);
|
extern bool create_dir(const char *path);
|
||||||
extern bool is_pg_dir(char *path);
|
extern bool is_pg_dir(const char *path);
|
||||||
extern PgDirState is_pg_running(char *path);
|
extern PgDirState is_pg_running(const char *path);
|
||||||
extern bool create_pg_dir(char *path, bool force);
|
extern bool create_pg_dir(const char *path, bool force);
|
||||||
extern int rmdir_recursive(char *path);
|
extern int rmdir_recursive(const char *path);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ clean:
|
|||||||
|
|
||||||
maintainer-clean:
|
maintainer-clean:
|
||||||
rm -rf html
|
rm -rf html
|
||||||
rm -rf Makefile
|
rm -f Makefile
|
||||||
|
|
||||||
zip: html
|
zip: html
|
||||||
cp -r html repmgr-docs-$(REPMGR_VERSION)
|
cp -r html repmgr-docs-$(REPMGR_VERSION)
|
||||||
|
|||||||
@@ -21,13 +21,17 @@
|
|||||||
in PostgreSQL 9.3, as well as improved automated failover support
|
in PostgreSQL 9.3, as well as improved automated failover support
|
||||||
via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
|
via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
|
||||||
and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
|
and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
|
||||||
series will no longer be actively maintained.
|
series is no longer maintained.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
&repmgr; 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
||||||
with PostgreSQL 9.3, we recommend using repmgr 4.x. &repmgr; 2.x is
|
with PostgreSQL 9.3, we recommend using repmgr 4.x. &repmgr; 2.x is
|
||||||
no longer maintained.
|
no longer maintained.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
See also <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||||
|
and <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||||
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
|
<sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
|
||||||
@@ -35,15 +39,25 @@
|
|||||||
<para>
|
<para>
|
||||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
||||||
primary server will retain WAL files until they have been consumed
|
primary server will retain WAL files until they have been consumed
|
||||||
by all standby servers. This makes WAL file management much easier,
|
by all standby servers. This means standby servers should never
|
||||||
and if used &repmgr; will no longer insist on a fixed minimum number
|
fail due to not being able to retrieve required WAL files from the
|
||||||
(default: 5000) of WAL files being retained.
|
primary.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
However this does mean that if a standby is no longer connected to the
|
However this does mean that if a standby is no longer connected to the
|
||||||
primary, the presence of the replication slot will cause WAL files
|
primary, the presence of the replication slot will cause WAL files
|
||||||
to be retained indefinitely.
|
to be retained indefinitely, and eventually lead to disk space
|
||||||
|
exhaustion.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
2ndQuadrant's recommended configuration is to configure
|
||||||
|
<ulink url="https://www.pgbarman.org/">Barman</ulink> as a fallback
|
||||||
|
source of WAL files, rather than maintain replication slots for
|
||||||
|
each standby. See also: <link linkend="cloning-from-barman-restore-command">Using Barman as a WAL file source</link>.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
|
<sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
|
||||||
@@ -62,7 +76,7 @@
|
|||||||
<para>
|
<para>
|
||||||
Before PostgreSQL 10, hash indexes were not WAL logged and are therefore not suitable
|
Before PostgreSQL 10, hash indexes were not WAL logged and are therefore not suitable
|
||||||
for use in streaming replication in PostgreSQL 9.6 and earlier. See the
|
for use in streaming replication in PostgreSQL 9.6 and earlier. See the
|
||||||
<ulink url="https://www.postgresql.org/docs/9.6/static/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
|
<ulink url="https://www.postgresql.org/docs/9.6/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
|
||||||
for details.
|
for details.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
@@ -82,12 +96,11 @@
|
|||||||
<para>
|
<para>
|
||||||
For <emphasis>major</emphasis> version upgrades (e.g. from PostgreSQL 9.6 to PostgreSQL 10),
|
For <emphasis>major</emphasis> version upgrades (e.g. from PostgreSQL 9.6 to PostgreSQL 10),
|
||||||
the traditional approach is to "reseed" a cluster by upgrading a single
|
the traditional approach is to "reseed" a cluster by upgrading a single
|
||||||
node with <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade</ulink>
|
node with <ulink url="https://www.postgresql.org/docs/current/pgupgrade.html">pg_upgrade</ulink>
|
||||||
and recloning standbys from this.
|
and recloning standbys from this.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
To minimize downtime during major upgrades, for more recent PostgreSQL
|
To minimize downtime during major upgrades from PostgreSQL 9.4 and later,
|
||||||
versions (PostgreSQL 9.4 and later),
|
|
||||||
<ulink url="https://www.2ndquadrant.com/en/resources/pglogical/">pglogical</ulink>
|
<ulink url="https://www.2ndquadrant.com/en/resources/pglogical/">pglogical</ulink>
|
||||||
can be used to set up a parallel cluster using the newer PostgreSQL version,
|
can be used to set up a parallel cluster using the newer PostgreSQL version,
|
||||||
which can be kept in sync with the existing production cluster until the
|
which can be kept in sync with the existing production cluster until the
|
||||||
@@ -116,6 +129,74 @@
|
|||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-repmgr-required-for-replication">
|
||||||
|
<title>Is &repmgr; required for streaming replication?</title>
|
||||||
|
<para>
|
||||||
|
No.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
&repmgr; (together with <application>repmgrd</application>) assists with
|
||||||
|
<emphasis>managing</emphasis> replication. It does not actually perform replication, which
|
||||||
|
is part of the core PostgreSQL functionality.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-what-if-repmgr-uninstalled">
|
||||||
|
<title>Will replication stop working if &repmgr; is uninstalled?</title>
|
||||||
|
<para>
|
||||||
|
No. See preceding question.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-version-mix">
|
||||||
|
<title>Does it matter if different &repmgr; versions are present in the replication cluster?</title>
|
||||||
|
<para>
|
||||||
|
Yes. If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x) are present,
|
||||||
|
&repmgr; (in particular <application>repmgrd</application>)
|
||||||
|
may not run, or run properly, or in the worst case (if different <application>repmgrd</application>
|
||||||
|
versions are running and there are differences in the failover implementation) break
|
||||||
|
your replication cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If different "minor" &repmgr; versions (e.g. 4.1.1 and 4.1.6) are installed,
|
||||||
|
&repmgr; will function, but we strongly recommend always running the same version
|
||||||
|
to ensure there are no unexpected suprises, e.g. a newer version behaving slightly
|
||||||
|
differently to the older version.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
See also <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-upgrade-repmgr">
|
||||||
|
<title>Should I upgrade &repmgr;?</title>
|
||||||
|
<para>
|
||||||
|
Yes.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We don't release new versions for fun, you know. Upgrading may require a little effort,
|
||||||
|
but running an older &repmgr; version with bugs which have since been fixed may end up
|
||||||
|
costing you more effort. The same applies to PostgreSQL itself.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-repmgr-conf-data-directory">
|
||||||
|
<title>Why do I need to specify the data directory location in repmgr.conf?</title>
|
||||||
|
<para>
|
||||||
|
In some circumstances &repmgr; may need to access a PostgreSQL data
|
||||||
|
directory while the PostgreSQL server is not running, e.g. to confirm
|
||||||
|
it shut down cleanly during a <link linkend="performing-switchover">switchover</link>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Additionally, this provides support when using &repmgr; on PostgreSQL 9.6 and
|
||||||
|
earlier, where the <literal>repmgr</literal> user is not a superuser; in that
|
||||||
|
case the <literal>repmgr</literal> user will not be able to access the
|
||||||
|
<literal>data_directory</literal> configuration setting, access to which is restricted
|
||||||
|
to superusers. (In PostgreSQL 10 and later, non-superusers can be added to the
|
||||||
|
group <option>pg_read_all_settings</option> which will enable them to read this setting).
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
||||||
|
|||||||
@@ -64,7 +64,7 @@
|
|||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>Repository documentation:</entry>
|
<entry>Repository documentation:</entry>
|
||||||
<entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
|
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
|
||||||
</row>
|
</row>
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
@@ -276,7 +276,7 @@
|
|||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>Repository documentation:</entry>
|
<entry>Repository documentation:</entry>
|
||||||
<entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
|
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
|
||||||
</row>
|
</row>
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
@@ -293,7 +293,7 @@
|
|||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>Repository documentation:</entry>
|
<entry>Repository documentation:</entry>
|
||||||
<entry><ulink url="https://wiki.postgresql.org/wiki/Apt)">https://wiki.postgresql.org/wiki/Apt)</ulink></entry>
|
<entry><ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink></entry>
|
||||||
</row>
|
</row>
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
@@ -456,14 +456,21 @@ repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
|||||||
|
|
||||||
<sect1 id="packages-old-versions" xreflabel="Installing old package versions">
|
<sect1 id="packages-old-versions" xreflabel="Installing old package versions">
|
||||||
<title>Installing old package versions</title>
|
<title>Installing old package versions</title>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>old packages</primary>
|
<primary>old packages</primary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>packages</primary>
|
<primary>packages</primary>
|
||||||
<secondary>old versions</secondary>
|
<secondary>old versions</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>installation</primary>
|
||||||
|
<secondary>old package versions</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<sect2 id="packages-old-versions-debian" xreflabel="old Debian package versions">
|
<sect2 id="packages-old-versions-debian" xreflabel="old Debian package versions">
|
||||||
<title>Debian/Ubuntu</title>
|
<title>Debian/Ubuntu</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -15,11 +15,307 @@
|
|||||||
See also: <xref linkend="upgrading-repmgr">
|
See also: <xref linkend="upgrading-repmgr">
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect1 id="release-4.3">
|
||||||
|
<title>Release 4.3</title>
|
||||||
|
<para><emphasis>Mar ???, 2019</emphasis></para>
|
||||||
|
<para>
|
||||||
|
&repmgr; 4.3 is a major release.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
On Debian-based systems, including Ubuntu, if using <application>repmgrd</application>
|
||||||
|
please ensure that in the file <filename>/etc/init.d/repmgrd</filename>, the parameter
|
||||||
|
<varname>REPMGRD_OPTS</varname> contains "<literal>--daemonize=false</literal>", e.g.:
|
||||||
|
<programlisting>
|
||||||
|
# additional options
|
||||||
|
REPMGRD_OPTS="--daemonize=false"</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For further details, see <link linkend="repmgrd-configuration-debian-ubuntu">repmgrd configuration on Debian/Ubuntu</link>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>repmgr enhancements</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>:
|
||||||
|
option <option>--upstream-node-id</option> can now be used to specify another standby
|
||||||
|
to follow.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>:
|
||||||
|
verify that it is actually possible to follow another node.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>:
|
||||||
|
verify that it is actually possible to attach the node to the current primary.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
New commands <link linkend="repmgr-daemon-start"><command>repmgr daemon start</command></link> and
|
||||||
|
<link linkend="repmgr-daemon-stop"><command>repmgr daemon stop</command></link>:
|
||||||
|
these provide a standardized way of starting and stopping <application>repmgrd</application>.
|
||||||
|
GitHub #528.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
These commands require the configuration file settings
|
||||||
|
<varname>repmgrd_service_start_command</varname> and <varname>repmgrd_service_stop_command</varname>
|
||||||
|
in <filename>repmgr.conf</filename> to be set.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>
|
||||||
|
additionally displays the node priority and the interval (in seconds) since the
|
||||||
|
<application>repmgrd</application> instance last verified its upstream node was available.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Add <option>--compact</option> option to <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command> (GitHub #521).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This makes it easier to copy the output into emails, chats etc. as a compact table.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>:
|
||||||
|
differentiate between unreachable nodes and nodes which are running but rejecting connections.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This makes it possible to see whether a node is unreachable at network level,
|
||||||
|
or if it is running but rejecting connections for some reason.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Add <option>--dry-run</option> to <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command> (GitHub #522).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>repmgr --version-number</command> outputs the "raw"
|
||||||
|
repmgr version number (e.g. <literal>40300</literal>). This is intended
|
||||||
|
for use by scripts etc. requiring an easily parseable representation
|
||||||
|
of the &repmgr; version.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-check"><command>repmgr node check --data-directory-config</command></link>
|
||||||
|
option added; this is to confirm &repmgr; is correctly configured. GitHub #523.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Add check to <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>
|
||||||
|
to ensure the data directory on the demotion candidate is configured correctly in <filename>repmgr.conf</filename>.
|
||||||
|
This is to ensure that &repmgr;, when remotely executed on the demotion candidate, can correctly verify
|
||||||
|
that PostgreSQL on the demotion candidate was shut down cleanly. GitHub #523.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>repmgrd enhancements</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> will no longer consider nodes where <application>repmgrd</application>
|
||||||
|
is not running as promotion candidates.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Previously, if <application>repmgrd</application> was not running on a node, but
|
||||||
|
that node qualified as the promotion candidate, it would never be promoted due to
|
||||||
|
the absence of a running <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Add option <option>connection_check_type</option> to enable selection of the method
|
||||||
|
<application>repmgrd</application> uses to determine whether the upstream node is available.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Possible values are <literal>ping</literal> (default; uses <command>PQping()</command> to
|
||||||
|
determine server availability), <literal>connection</literal> (attempst to make a new connection to
|
||||||
|
the upstream node), and <literal>query</literal> (determines server availability
|
||||||
|
by executing an SQL statement on the node via the existing connection).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
New configuration option <link linkend="repmgrd-failover-validation"><option>failover_validation_command</option></link>
|
||||||
|
to allow an external mechanism to validate the failover decision made by <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
New configuration option <link linkend="repmgrd-standby-disconnection-on-failover"><option>standby_disconnect_on_failover</option></link>
|
||||||
|
to force standbys to disconnect their WAL receivers before making a failover decision.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In a failover situation, <application>repmgrd</application> will not attempt to promote a
|
||||||
|
node if another standby has already appeared (e.g. by being promoted manually).
|
||||||
|
GitHub #420.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Bug fixes</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>,
|
||||||
|
prevent escaping issues with connection URIs when executing <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
|
||||||
|
on the demotion candidate. GitHub #525.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <command><link linkend="repmgr-witness-register">repmgr witness register</link></command>,
|
||||||
|
check the node to connected is actually the primary (i.e. not the witness server). GitHub #528.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>,
|
||||||
|
recheck primary/upstream connection(s) after the data copy operation is complete, as these may
|
||||||
|
have gone away.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
|
avoid a potential race condition when comparing received WAL on the standby to the primary's shutdown location,
|
||||||
|
as the standby's walreceiver may not have yet flushed all received WAL to disk. GitHub #518.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
|
verify the standby (promotion candidate) is currently attached to the primary (demotion candidate). GitHub #519.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application>: on a cascaded standby, don't fail over if
|
||||||
|
<literal>failover=manual</literal>. GitHub #531.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>:
|
||||||
|
fix display of node IDs with multiple digits.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ensure <command><link linkend="repmgr-primary-unregister">repmgr primary unregister</link></command>
|
||||||
|
behaves correctly when executed on a witness server. GitHub #548.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ensure <command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
|
||||||
|
fails when <option>--upstream-node-id</option> is the same as the local node ID.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-node-check">repmgr node check</link></command>
|
||||||
|
will only consider physical replication slots, as the purpose
|
||||||
|
of slot checks is to warn about potential issues with
|
||||||
|
streaming replication standbys which are no longer attached.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="release-4.2">
|
<sect1 id="release-4.2">
|
||||||
<title>Release 4.2</title>
|
<title>Release 4.2</title>
|
||||||
<para><emphasis>???, 2018</emphasis></para>
|
<para><emphasis>Wed October 24, 2018</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
&repmgr; 4.2 is a major release, with the main new feature being the
|
||||||
|
ability to <link linkend="repmgrd-pausing">pause repmgrd</link>, e.g. during planned maintenance
|
||||||
|
operations. Various other usability enhancements and a couple of bug fixes are also included;
|
||||||
|
see notes below for details.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
A restart of the PostgreSQL server <emphasis>is</emphasis> required
|
||||||
|
for this release. For detailed upgrade instructions, see
|
||||||
|
<link linkend="upgrading-major-version">Upgrading a major version release</link>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
On Debian-based systems, including Ubuntu, if using <application>repmgrd</application>
|
||||||
|
please ensure that the in the file <filename>/etc/init.d/repmgrd</filename>, the parameter
|
||||||
|
<varname>REPMGRD_OPTS</varname> contains "<literal>--daemonize=false</literal>", e.g.:
|
||||||
|
<programlisting>
|
||||||
|
# additional options
|
||||||
|
REPMGRD_OPTS="--daemonize=false"</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For further details, see <link linkend="repmgrd-configuration-debian-ubuntu">repmgrd daemon configuration on Debian/Ubuntu</link>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Configuration file changes</title>
|
<title>Configuration file changes</title>
|
||||||
@@ -96,10 +392,43 @@
|
|||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>repmgrd enhancements</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> can now be "paused", i.e. instructed
|
||||||
|
not to take any action such as a failover, even if the prerequisites for such an
|
||||||
|
action are detected.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This removes the need to stop <application>repmgrd</application> on all nodes when
|
||||||
|
performing a planned operation such as a switchover.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For further details, see <link linkend="repmgrd-pausing">Pausing repmgrd</link>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Bug fixes</title>
|
<title>Bug fixes</title>
|
||||||
<para>
|
<para>
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: fix "Missing replication slots" label in
|
||||||
|
<command><link linkend="repmgr-node-check">repmgr node check</link></command>. (GitHub #507)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application>: fix parsing of <option>-d/--daemonize</option> option.
|
<application>repmgrd</application>: fix parsing of <option>-d/--daemonize</option> option.
|
||||||
@@ -1228,7 +1557,7 @@
|
|||||||
<emphasis>easier upgrades</emphasis>: &repmgr; is now implemented as a native
|
<emphasis>easier upgrades</emphasis>: &repmgr; is now implemented as a native
|
||||||
PostgreSQL extension, which means future upgrades can be carried out by
|
PostgreSQL extension, which means future upgrades can be carried out by
|
||||||
installing the upgraded package and issuing
|
installing the upgraded package and issuing
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/sql-alterextension.html">ALTER EXTENSION repmgr UPDATE</ulink>.
|
<ulink url="https://www.postgresql.org/docs/current/sql-alterextension.html">ALTER EXTENSION repmgr UPDATE</ulink>.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|||||||
96
doc/appendix-support.sgml
Normal file
96
doc/appendix-support.sgml
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
<appendix id="appendix-support" xreflabel="repmgr support">
|
||||||
|
<indexterm>
|
||||||
|
<primary>support</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>&repmgr; support</title>
|
||||||
|
<para>
|
||||||
|
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides 24x7
|
||||||
|
production support for &repmgr; and other PostgreSQL
|
||||||
|
products, including configuration assistance, installation
|
||||||
|
verification and training for running a robust replication cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For further details see: <ulink url="https://2ndquadrant.com/en/support/">https://2ndquadrant.com/en/support/</ulink>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
A mailing list/forum is provided via Google groups to discuss contributions or issues: <ulink url="https://groups.google.com/group/repmgr">https://groups.google.com/group/repmgr</ulink>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Please report bugs and other issues to: <ulink url="https://github.com/2ndQuadrant/repmgr">https://github.com/2ndQuadrant/repmgr</ulink>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
Please read the <link linkend="appendix-support-reporting-issues">following section</link> before submitting questions or issue reports.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
<sect1 id="appendix-support-reporting-issues" xreflabel="Reportins Issues">
|
||||||
|
<indexterm>
|
||||||
|
<primary>support</primary>
|
||||||
|
<secondary>reporting issues</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Reporting Issues</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
When asking questions or reporting issues, it is extremely helpful if the following information is included:
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
&repmgr; version
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
How was &repmgr installed? From source? From packages? If
|
||||||
|
so from which repository?
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<filename>repmpgr.conf</filename> files (suitably anonymized if necessary)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
Contents of the <literal>repmgr.nodes</literal> table (suitably anonymized if necessary)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
PostgreSQL version
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If issues are encountered with a &repmgr; client command, please provide
|
||||||
|
the output of that command executed with the options
|
||||||
|
<option>-LDEBUG --verbose</option>, which will ensure &repmgr; emits
|
||||||
|
the maximum level of logging output.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If issues are encountered with <application>repmgrd</application>,
|
||||||
|
please provide relevant extracts from the &repmgr; log files
|
||||||
|
and if possible the PostgreSQL log itself. Please ensure these
|
||||||
|
logs do not contain any confidential data.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In all cases it is <emphasis>extremely</emphasis> useful to receive
|
||||||
|
information on how to reliably reproduce an issue with as much detail as
|
||||||
|
possible.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
</appendix>
|
||||||
@@ -4,5 +4,5 @@ BDR failover with repmgrd
|
|||||||
This document has been integrated into the main `repmgr` documentation
|
This document has been integrated into the main `repmgr` documentation
|
||||||
and is now located here:
|
and is now located here:
|
||||||
|
|
||||||
> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)
|
> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html)
|
||||||
|
|
||||||
|
|||||||
@@ -4,4 +4,4 @@ Changes in repmgr 4
|
|||||||
This document has been integrated into the main `repmgr` documentation
|
This document has been integrated into the main `repmgr` documentation
|
||||||
and is now located here:
|
and is now located here:
|
||||||
|
|
||||||
> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
|
> [Release notes](https://repmgr.org/docs/current/release-4.0.html)
|
||||||
|
|||||||
@@ -243,8 +243,8 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
<simpara>
|
<simpara>
|
||||||
As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
|
As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
|
||||||
which offloads WAL management to a separate server, negating the need to use replication
|
which offloads WAL management to a separate server, removing the requirement to use a replication
|
||||||
slots to reserve WAL. See section <xref linkend="cloning-from-barman">
|
slot for each individual standby to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||||
for more details on using &repmgr; together with Barman.
|
for more details on using &repmgr; together with Barman.
|
||||||
</simpara>
|
</simpara>
|
||||||
</tip>
|
</tip>
|
||||||
@@ -262,7 +262,7 @@
|
|||||||
meaning replication changes "cascade" down through a hierarchy of servers. This
|
meaning replication changes "cascade" down through a hierarchy of servers. This
|
||||||
can be used to reduce load on the primary and minimize bandwith usage between
|
can be used to reduce load on the primary and minimize bandwith usage between
|
||||||
sites. For more details, see the
|
sites. For more details, see the
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/warm-standby.html#CASCADING-REPLICATION">
|
<ulink url="https://www.postgresql.org/docs/current/warm-standby.html#CASCADING-REPLICATION">
|
||||||
PostgreSQL cascading replication documentation</ulink>.
|
PostgreSQL cascading replication documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
@@ -391,7 +391,7 @@
|
|||||||
a symlink will automatically be created from the main data directory.
|
a symlink will automatically be created from the main data directory.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
|
See the <ulink url="https://www.postgresql.org/docs/current/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
|
||||||
for more details of available options.
|
for more details of available options.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
@@ -413,7 +413,7 @@
|
|||||||
user's <filename>~/.pgpass</filename> file. It's also possible to store the password in the
|
user's <filename>~/.pgpass</filename> file. It's also possible to store the password in the
|
||||||
environment variable <varname>PGPASSWORD</varname>, however this is not recommended for
|
environment variable <varname>PGPASSWORD</varname>, however this is not recommended for
|
||||||
security reasons. For more details see the
|
security reasons. For more details see the
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
<ulink url="https://www.postgresql.org/docs/current/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
|
|||||||
@@ -39,6 +39,10 @@
|
|||||||
called <varname>standby1</varname> (for example), things will be confusing
|
called <varname>standby1</varname> (for example), things will be confusing
|
||||||
to say the least.
|
to say the least.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
The string's maximum length is 63 characters and it should
|
||||||
|
contain only printable ASCII characters.
|
||||||
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@@ -56,7 +60,7 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For details on conninfo strings, see section <ulink
|
For details on conninfo strings, see section <ulink
|
||||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">Connection Strings</>
|
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING">Connection Strings</>
|
||||||
in the PosgreSQL documentation.
|
in the PosgreSQL documentation.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
@@ -64,7 +68,7 @@
|
|||||||
<varname>connect_timeout</varname> in the <varname>conninfo</varname>
|
<varname>connect_timeout</varname> in the <varname>conninfo</varname>
|
||||||
string to determine the length of time which elapses before a network
|
string to determine the length of time which elapses before a network
|
||||||
connection attempt is abandoned; for details see <ulink
|
connection attempt is abandoned; for details see <ulink
|
||||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT">
|
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT">
|
||||||
the PostgreSQL documentation</>.
|
the PostgreSQL documentation</>.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|||||||
@@ -17,15 +17,15 @@
|
|||||||
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
|
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
|
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
|
||||||
server. However this can lead to various problems, particularly when PostgreSQL has been
|
server. However this can lead to various problems, particularly when PostgreSQL has been
|
||||||
installed from packages, and expecially so if <application>systemd</application> is in use.
|
installed from packages, and especially so if <application>systemd</application> is in use.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
|
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
|
||||||
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
||||||
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
||||||
</para>
|
</para>
|
||||||
@@ -74,12 +74,12 @@
|
|||||||
|
|
||||||
<para>
|
<para>
|
||||||
To confirm which command &repmgr; will execute for each action, use
|
To confirm which command &repmgr; will execute for each action, use
|
||||||
<command>repmgr node service --list --action=...</command>, e.g.:
|
<command><link linkend="repmgr-node-service">repmgr node service --list-actions --action=...</link></command>, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=restart
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=reload</programlisting>
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=reload</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
<sect1 id="configuration-file" xreflabel="configuration file location">
|
<sect1 id="configuration-file" xreflabel="configuration file">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>repmgr.conf</primary>
|
<primary>repmgr.conf</primary>
|
||||||
<secondary>location</secondary>
|
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>configuration</primary>
|
<primary>configuration</primary>
|
||||||
<secondary>repmgr.conf location</secondary>
|
<secondary>repmgr.conf</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<title>Configuration file location</title>
|
<title>Configuration file</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<application>repmgr</application> and <application>repmgrd</application>
|
<application>repmgr</application> and <application>repmgrd</application>
|
||||||
use a common configuration file, by default called
|
use a common configuration file, by default called
|
||||||
@@ -21,6 +21,55 @@
|
|||||||
for more details.
|
for more details.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect2 id="configuration-file-format" xreflabel="configuration file format">
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr.conf</primary>
|
||||||
|
<secondary>format</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Configuration file format</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<filename>repmgr.conf</filename> is a plain text file with one parameter/value
|
||||||
|
combination per line.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Whitespace is insignificant (except within a quoted parameter value) and blank lines are ignored.
|
||||||
|
Hash marks (<literal>#</literal>) designate the remainder of the line as a comment.
|
||||||
|
Parameter values that are not simple identifiers or numbers should be single-quoted.
|
||||||
|
Note that single quote cannot be embedded in a parameter value.
|
||||||
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
&repmgr; will interpret double-quotes as being part of a string value; only use single quotes
|
||||||
|
to quote parameter values.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example of a valid <filename>repmgr.conf</filename> file:
|
||||||
|
<programlisting>
|
||||||
|
# repmgr.conf
|
||||||
|
|
||||||
|
node_id=1
|
||||||
|
node_name= node1
|
||||||
|
conninfo ='host=node1 dbname=repmgr user=repmgr connect_timeout=2'
|
||||||
|
data_directory = /var/lib/pgsql/11/data</programlisting>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<sect2 id="configuration-file-location" xreflabel="configuration file location">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr.conf</primary>
|
||||||
|
<secondary>location</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Configuration file location</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The configuration file will be searched for in the following locations:
|
The configuration file will be searched for in the following locations:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
@@ -50,7 +99,7 @@
|
|||||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||||
check default locations; this is to prevent <application>repmgr</application> unexpectedly
|
check default locations; this is to prevent <application>repmgr</application> unexpectedly
|
||||||
reading the wrong configuraton file.
|
reading the wrong configuration file.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
@@ -65,5 +114,7 @@
|
|||||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||||
<filename>/path/to/repmgr.conf</filename>).
|
<filename>/path/to/repmgr.conf</filename>).
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
</sect1>
|
|
||||||
|
</sect2>
|
||||||
|
</sect1>
|
||||||
|
|||||||
@@ -1,6 +1,292 @@
|
|||||||
<chapter id="configuration" xreflabel="Configuration">
|
<chapter id="configuration" xreflabel="Configuration">
|
||||||
<title>repmgr configuration</title>
|
<title>repmgr configuration</title>
|
||||||
|
|
||||||
|
<sect1 id="configuration-prerequisites" xreflabel="Prerequisites for configuration">
|
||||||
|
<indexterm>
|
||||||
|
<primary>configuration</primary>
|
||||||
|
<secondary>prerequisites</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>configuration</primary>
|
||||||
|
<secondary>ssh</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Prerequisites for configuration</title>
|
||||||
|
<para>
|
||||||
|
Following software must be installed on both servers:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara><application>PostgreSQL</application></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<application>repmgr</application>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
At network level, connections between the PostgreSQL port (default: <literal>5432</literal>)
|
||||||
|
must be possible between all nodes.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Passwordless <command>SSH</command> connectivity between all servers in the replication cluster
|
||||||
|
is not required, but is necessary in the following cases:
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||||
|
data directory (as is the case with e.g. <link linkend="packages-debian-ubuntu">Debian packages</link>);
|
||||||
|
in this case <command>rsync</command> must also be installed on all servers.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||||
|
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<simpara>
|
||||||
|
Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
|
||||||
|
This will make it faster to detect any SSH connection errors.
|
||||||
|
</simpara>
|
||||||
|
</tip>
|
||||||
|
|
||||||
|
<sect2 id="configuration-postgresql" xreflabel="PostgreSQL configuration">
|
||||||
|
<indexterm>
|
||||||
|
<primary>configuration</primary>
|
||||||
|
<secondary>PostgreSQL</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>PostgreSQL configuration</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>PostgreSQL configuration for &repmgr;</title>
|
||||||
|
<para>
|
||||||
|
The following PostgreSQL configuration parameters may need to be changed in order
|
||||||
|
for &repmgr; (and replication itself) to function correctly.
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>hot_standby</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>hot_standby</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<option>hot_standby</option> must always be set to <literal>on</literal>, as &repmgr; needs
|
||||||
|
to be able to connect to each server it manages.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that <option>hot_standby</option> defaults to <literal>on</literal> from PostgreSQL 10
|
||||||
|
and later; in PostgreSQL 9.6 and earlier, the default was <literal>off</literal>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY">hot_standby</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>wal_level</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>wal_level</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<option>wal_level</option> must be one of <option>replica</option> or <option>logical</option>
|
||||||
|
(PostgreSQL 9.5 and earlier: one of <option>hot_standby</option> or <option>logical</option>).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL">wal_level</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>max_wal_senders</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>max_wal_senders</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<option>max_wal_senders</option> must be set to a value of <literal>2</literal> or greater.
|
||||||
|
In general you will need one WAL sender for each standby which will attach to the PostgreSQL
|
||||||
|
instance; additionally &repmgr; will require two free WAL senders in order to clone further
|
||||||
|
standbys.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<option>max_wal_senders</option> should be set to an appropriate value on all PostgreSQL
|
||||||
|
instances in the replication cluster which may potentially become a primary server or
|
||||||
|
(in cascading replication) the upstream server of a standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS">max_wal_senders</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>max_replication_slots</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>max_replication_slots</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If you are intending to use replication slots, <option>max_replication_slots</option>
|
||||||
|
must be set to a non-zero value.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<option>max_replication_slots</option> should be set to an appropriate value on all PostgreSQL
|
||||||
|
instances in the replication cluster which may potentially become a primary server or
|
||||||
|
(in cascading replication) the upstream server of a standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-REPLICATION-SLOTS">max_replication_slots</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>wal_log_hints</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>wal_log_hints</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>If you are intending to use <application>pg_rewind</application>,
|
||||||
|
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||||
|
<option>wal_log_hints</option>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LOG-HINTS">wal_log_hints</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>archive_mode</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>archive_mode</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
We suggest setting <option>archive_mode</option> to <literal>on</literal> (and
|
||||||
|
<option>archive_command</option> to <literal>/bin/true</literal>; see below)
|
||||||
|
even if you are currently not planning to use WAL file archiving.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This will make it simpler to set up WAL file archiving if it is ever required,
|
||||||
|
as changes to <option>archive_mode</option> require a full PostgreSQL server
|
||||||
|
restart, while <option>archive_command</option> changes can be applied via a normal
|
||||||
|
configuration reload.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
However, &repmgr; itself does not require WAL file archiving.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE">archive_mode</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>archive_command</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>archive_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If you have set <option>archive_mode</option> to <literal>on</literal> but are not currently planning
|
||||||
|
to use WAL file archiving, set <option>archive_command</option> to a command which does nothing but returns
|
||||||
|
<literal>true</literal>, such as <command>/bin/true</command>. See above for details.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND">archive_command</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>wal_keep_segments</primary>
|
||||||
|
<secondary>PostgreSQL configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>wal_keep_segments</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Normally there is no need to set <option>wal_keep_segments</option> (default: <literal>0</literal>), as it
|
||||||
|
is <emphasis>not</emphasis> a reliable way of ensuring that all required WAL segments are available to standbys.
|
||||||
|
Replication slots and/or an archiving solution such as Barman are recommended to ensure standbys have a reliable
|
||||||
|
source of WAL segments at all times.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The only reason ever to set <option>wal_keep_segments</option> is you have
|
||||||
|
you have configured <option>pg_basebackup_options</option>
|
||||||
|
in <filename>repmgr.conf</filename> to include the setting <literal>--wal-method=fetch</literal>
|
||||||
|
(PostgreSQL 9.6 and earlier: <literal>--xlog-method=fetch</literal>)
|
||||||
|
<emphasis>and</emphasis> you have <emphasis>not</emphasis> set <option>restore_command</option>
|
||||||
|
in <filename>repmgr.conf</filename> to fetch WAL files from a reliable source such as Barman,
|
||||||
|
in which case you'll need to set <option>wal_keep_segments</option>
|
||||||
|
to a sufficiently high number to ensure that all WAL files required by the standby
|
||||||
|
are retained. However we do not recommend managing replication in this way.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-WAL-KEEP-SEGMENTS">wal_keep_segments</ulink>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
<para>
|
||||||
|
See also the <link linkend="quickstart-postgresql-configuration">PostgreSQL configuration</link> section in the
|
||||||
|
<link linkend="quickstart">Quick-start guide</link>.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
&configuration-file;
|
&configuration-file;
|
||||||
&configuration-file-required-settings;
|
&configuration-file-required-settings;
|
||||||
&configuration-file-log-settings;
|
&configuration-file-log-settings;
|
||||||
|
|||||||
@@ -1,93 +0,0 @@
|
|||||||
<chapter id="using-witness-server">
|
|
||||||
<indexterm>
|
|
||||||
<primary>witness server</primary>
|
|
||||||
<seealso>Using a witness server with repmgrd</seealso>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
|
|
||||||
<title>Using a witness server</title>
|
|
||||||
<para>
|
|
||||||
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
|
||||||
is not part of the streaming replication cluster; its purpose is, if a
|
|
||||||
failover situation occurs, to provide proof that the primary server
|
|
||||||
itself is unavailable.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
A typical use case for a witness server is a two-node streaming replication
|
|
||||||
setup, where the primary and standby are in different locations (data centres).
|
|
||||||
By creating a witness server in the same location (data centre) as the primary,
|
|
||||||
if the primary becomes unavailable it's possible for the standby to decide whether
|
|
||||||
it can promote itself without risking a "split brain" scenario: if it can't see either the
|
|
||||||
witness or the primary server, it's likely there's a network-level interruption
|
|
||||||
and it should not promote itself. If it can seen the witness but not the primary,
|
|
||||||
this proves there is no network interruption and the primary itself is unavailable,
|
|
||||||
and it can therefore promote itself (and ideally take action to fence the
|
|
||||||
former primary).
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
<emphasis>Never</emphasis> install a witness server on the same physical host
|
|
||||||
as another node in the replication cluster managed by &repmgr; - it's essential
|
|
||||||
the witness is not affected in any way by failure of another node.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
|
||||||
be preferable to use location-based failover, which ensures that only nodes
|
|
||||||
in the same location as the primary will ever be promotion candidates;
|
|
||||||
see <xref linkend="repmgrd-network-split"> for more details.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
A witness server will only be useful if <application>repmgrd</application>
|
|
||||||
is in use.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<sect1 id="creating-witness-server">
|
|
||||||
<title>Creating a witness server</title>
|
|
||||||
<para>
|
|
||||||
To create a witness server, set up a normal PostgreSQL instance on a server
|
|
||||||
in the same physical location as the cluster's primary server.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This instance should *not* be on the same physical host as the primary server,
|
|
||||||
as otherwise if the primary server fails due to hardware issues, the witness
|
|
||||||
server will be lost too.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
|
||||||
command, which would automatically create a PostgreSQL instance. However
|
|
||||||
this often resulted in an unsatisfactory, hard-to-customise instance.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
The witness server should be configured in the same way as a normal
|
|
||||||
&repmgr; node; see section <xref linkend="configuration">.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Register the witness server with <xref linkend="repmgr-witness-register">.
|
|
||||||
This will create the &repmgr; extension on the witness server, and make
|
|
||||||
a copy of the &repmgr; metadata.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
As the witness server is not part of the replication cluster, further
|
|
||||||
changes to the &repmgr; metadata will be synchronised by
|
|
||||||
<application>repmgrd</application>.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
Once the witness server has been configured, <application>repmgrd</application>
|
|
||||||
should be started; for more details see <xref linkend="repmgrd-witness-server">.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
@@ -88,7 +88,7 @@
|
|||||||
|
|
||||||
<para>
|
<para>
|
||||||
The values provided for <literal>%t</literal> and <literal>%d</literal>
|
The values provided for <literal>%t</literal> and <literal>%d</literal>
|
||||||
will probably contain spaces, so should be quoted in the provided command
|
may contain spaces, so should be quoted in the provided command
|
||||||
configuration, e.g.:
|
configuration, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||||
|
|||||||
@@ -45,20 +45,14 @@
|
|||||||
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
||||||
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
||||||
<!ENTITY switchover SYSTEM "switchover.sgml">
|
<!ENTITY switchover SYSTEM "switchover.sgml">
|
||||||
<!ENTITY configuring-witness-server SYSTEM "configuring-witness-server.sgml">
|
|
||||||
|
|
||||||
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
||||||
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
||||||
|
|
||||||
|
<!ENTITY repmgrd-overview SYSTEM "repmgrd-overview.sgml">
|
||||||
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.sgml">
|
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.sgml">
|
||||||
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.sgml">
|
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.sgml">
|
||||||
<!ENTITY repmgrd-demonstration SYSTEM "repmgrd-demonstration.sgml">
|
<!ENTITY repmgrd-operation SYSTEM "repmgrd-operation.sgml">
|
||||||
<!ENTITY repmgrd-monitoring SYSTEM "repmgrd-monitoring.sgml">
|
|
||||||
<!ENTITY repmgrd-degraded-monitoring SYSTEM "repmgrd-degraded-monitoring.sgml">
|
|
||||||
<!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
|
|
||||||
<!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
|
|
||||||
<!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
|
|
||||||
<!ENTITY repmgrd-pausing SYSTEM "repmgrd-pausing.sgml">
|
|
||||||
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
|
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
|
||||||
|
|
||||||
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
|
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
|
||||||
@@ -74,12 +68,15 @@
|
|||||||
<!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
|
<!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
|
||||||
<!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
|
<!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
|
||||||
<!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
|
<!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
|
||||||
|
<!ENTITY repmgr-node-service SYSTEM "repmgr-node-service.sgml">
|
||||||
<!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
|
<!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
|
||||||
<!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
|
<!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
|
||||||
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
|
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
|
||||||
<!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
|
<!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
|
||||||
<!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
|
<!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
|
||||||
<!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
|
<!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
|
||||||
|
<!ENTITY repmgr-daemon-start SYSTEM "repmgr-daemon-start.sgml">
|
||||||
|
<!ENTITY repmgr-daemon-stop SYSTEM "repmgr-daemon-stop.sgml">
|
||||||
<!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
|
<!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
|
||||||
<!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">
|
<!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">
|
||||||
|
|
||||||
@@ -87,6 +84,7 @@
|
|||||||
<!ENTITY appendix-faq SYSTEM "appendix-faq.sgml">
|
<!ENTITY appendix-faq SYSTEM "appendix-faq.sgml">
|
||||||
<!ENTITY appendix-signatures SYSTEM "appendix-signatures.sgml">
|
<!ENTITY appendix-signatures SYSTEM "appendix-signatures.sgml">
|
||||||
<!ENTITY appendix-packages SYSTEM "appendix-packages.sgml">
|
<!ENTITY appendix-packages SYSTEM "appendix-packages.sgml">
|
||||||
|
<!ENTITY appendix-support SYSTEM "appendix-support.sgml">
|
||||||
|
|
||||||
<!ENTITY bookindex SYSTEM "bookindex.sgml">
|
<!ENTITY bookindex SYSTEM "bookindex.sgml">
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
end of the preceding section (<xref linkend="promoting-standby">),
|
end of the preceding section (<xref linkend="promoting-standby">),
|
||||||
execute this:
|
execute this:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf repmgr standby follow
|
$ repmgr -f /etc/repmgr.conf standby follow
|
||||||
INFO: changing node 3's primary to node 2
|
INFO: changing node 3's primary to node 2
|
||||||
NOTICE: restarting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' restart"
|
NOTICE: restarting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' restart"
|
||||||
waiting for server to shut down......... done
|
waiting for server to shut down......... done
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
<sect1 id="installation-packages" xreflabel="Installing from packages">
|
<sect1 id="installation-packages" xreflabel="Installing from packages">
|
||||||
<title>Installing &repmgr; from packages</title>
|
<title>Installing &repmgr; from packages</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>installation</primary>
|
||||||
|
<secondary>from packages</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
We recommend installing &repmgr; using the available packages for your
|
We recommend installing &repmgr; using the available packages for your
|
||||||
system.
|
system.
|
||||||
@@ -29,9 +35,10 @@
|
|||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; packages are designed to be compatible with the community-provided PostgreSQL packages.
|
&repmgr; RPM packages are designed to be compatible with the community-provided PostgreSQL packages
|
||||||
|
and 2ndQuadrant's <ulink url="https://www.2ndquadrant.com/en/resources/2ndqpostgres/">2ndQPostgres</ulink>.
|
||||||
They may not work with vendor-specific packages such as those provided by RedHat for RHEL
|
They may not work with vendor-specific packages such as those provided by RedHat for RHEL
|
||||||
customers, as the filesystem layout may be different to the community RPMs.
|
customers, as the PostgreSQL filesystem layout may be different to the community RPMs.
|
||||||
Please contact your support vendor for assistance.
|
Please contact your support vendor for assistance.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
@@ -47,54 +54,76 @@
|
|||||||
<title>2ndQuadrant public RPM yum repository</title>
|
<title>2ndQuadrant public RPM yum repository</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Beginning with <ulink url="https://repmgr.org/docs/4.1/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
|
||||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
||||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
General instructions for using this repository can be found on its
|
General instructions for using this repository can be found on its
|
||||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||||
for installing &repmgr; follow below.
|
for installing &repmgr; follow below.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
<emphasis>Installation</emphasis>
|
<emphasis>Installation</emphasis>
|
||||||
|
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||||
<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
|
<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Install the repository definition for your distribution and PostgreSQL version
|
Install the repository definition for your distribution and PostgreSQL version
|
||||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For example, for PostgreSQL 10 on CentOS, execute:
|
For example, for PostgreSQL 10 on CentOS, execute:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
|
curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
Verify that the repository is installed with:
|
<para>
|
||||||
<programlisting>
|
For PostgreSQL 9.6 on CentOS, execute:
|
||||||
|
<programlisting>
|
||||||
|
curl https://dl.2ndquadrant.com/default/release/get/9.6/rpm | sudo bash</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Verify that the repository is installed with:
|
||||||
|
<programlisting>
|
||||||
sudo yum repolist</programlisting>
|
sudo yum repolist</programlisting>
|
||||||
The output should contain two entries like this:
|
The output should contain two entries like this:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
2ndquadrant-dl-default-release-pg10/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 4
|
2ndquadrant-dl-default-release-pg10/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 4
|
||||||
2ndquadrant-dl-default-release-pg10-debug/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 - Debug 3</programlisting>
|
2ndquadrant-dl-default-release-pg10-debug/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 - Debug 3</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ yum install repmgr10</programlisting>
|
sudo yum install repmgr10</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
For packages for PostgreSQL 9.6 and earlier, the package name does not contain
|
||||||
|
a period between major and minor version numbers, e.g.
|
||||||
|
<literal>repmgr96</literal>.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
To determine the names of available packages, execute:
|
||||||
|
<programlisting>
|
||||||
|
yum search repmgr</programlisting>
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
@@ -137,7 +166,17 @@ $ yum install repmgr10</programlisting>
|
|||||||
<programlisting>
|
<programlisting>
|
||||||
[root@localhost ~]# yum install repmgr10-4.0.3-1.rhel7</programlisting>
|
[root@localhost ~]# yum install repmgr10-4.0.3-1.rhel7</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<emphasis>Installing old packages</emphasis>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
See appendix <link linkend="packages-old-versions-rhel-centos">Installing old package versions</link>
|
||||||
|
for details on how to retrieve older package versions.
|
||||||
|
</para>
|
||||||
|
|
||||||
</sect3>
|
</sect3>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
||||||
@@ -163,16 +202,15 @@ $ yum install repmgr10</programlisting>
|
|||||||
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
|
||||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
||||||
<ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
<ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||||
including &repmgr;.
|
including &repmgr;.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
General instructions for using this repository can be found on its
|
General instructions for using this repository can be found on its
|
||||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||||
for installing &repmgr; follow below.
|
for installing &repmgr; follow below.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
|
||||||
@@ -180,13 +218,13 @@ $ yum install repmgr10</programlisting>
|
|||||||
|
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Install the repository definition for your distribution and PostgreSQL version
|
Install the repository definition for your distribution and PostgreSQL version
|
||||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
|
(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
|
curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
This will automatically install the following additional packages, if not already present:
|
This will automatically install the following additional packages, if not already present:
|
||||||
@@ -202,13 +240,12 @@ curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlist
|
|||||||
</note>
|
</note>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
<listitem>
|
<para>
|
||||||
<para>
|
|
||||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ apt-get install postgresql-10-repmgr</programlisting>
|
sudo apt-get install postgresql-10-repmgr</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
||||||
@@ -216,11 +253,20 @@ $ apt-get install postgresql-10-repmgr</programlisting>
|
|||||||
<literal>postgresql-9.6-repmgr</literal>.
|
<literal>postgresql-9.6-repmgr</literal>.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<emphasis>Installing old packages</emphasis>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
See appendix <link linkend="packages-old-versions-debian">Installing old package versions</link>
|
||||||
|
for details on how to retrieve older package versions.
|
||||||
|
</para>
|
||||||
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</sect3>
|
</sect3>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
@@ -13,8 +13,9 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
|
&repmgr; 4.x is compatible with all PostgreSQL versions from 9.3. See
|
||||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
section <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||||
|
for an overview of version compatibility.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
@@ -31,10 +32,24 @@
|
|||||||
<para>
|
<para>
|
||||||
&repmgr; must be installed on each server in the replication cluster.
|
&repmgr; must be installed on each server in the replication cluster.
|
||||||
If installing repmgr from packages, the package version must match the PostgreSQL
|
If installing repmgr from packages, the package version must match the PostgreSQL
|
||||||
version. If installing from source, repmgr must be compiled against the same
|
version. If installing from source, &repmgr; must be compiled against the same
|
||||||
major version.
|
major version.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
The same "major" &repmgr; version (e.g. <literal>4.2.x</literal>) <emphasis>must</emphasis>
|
||||||
|
be installed on all node in the replication cluster. We strongly recommend keeping all
|
||||||
|
nodes on the same (preferably latest) "minor" &repmgr; version to minimize the risk
|
||||||
|
of incompatibilities.
|
||||||
|
</simpara>
|
||||||
|
<simpara>
|
||||||
|
If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x)
|
||||||
|
are installed on different nodes, in the best case &repmgr; (in particular <application>repmgrd</application>)
|
||||||
|
will not run. In the worst case, you will end up with a broken cluster.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
|
A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
|
||||||
<application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
|
<application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
|
||||||
@@ -42,32 +57,10 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
|
See also <link linkend="configuration-prerequisites">Prerequisites for configuration</link>
|
||||||
is not required, but is necessary in the following cases:
|
for information on networking requirements.
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
|
||||||
data directory (in which case <command>rsync</command> is also required)</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
|
||||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<tip>
|
|
||||||
<simpara>
|
|
||||||
Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
|
|
||||||
This will make it faster to detect any SSH connection errors.
|
|
||||||
</simpara>
|
|
||||||
</tip>
|
|
||||||
|
|
||||||
<tip>
|
<tip>
|
||||||
<simpara>
|
<simpara>
|
||||||
We recommend using a session multiplexer utility such as <command>screen</command> or
|
We recommend using a session multiplexer utility such as <command>screen</command> or
|
||||||
@@ -76,4 +69,111 @@
|
|||||||
terminated if your <command>ssh</command> session to the server is interrupted or closed.
|
terminated if your <command>ssh</command> session to the server is interrupted or closed.
|
||||||
</simpara>
|
</simpara>
|
||||||
</tip>
|
</tip>
|
||||||
|
|
||||||
|
<sect2 id="install-compatibility-matrix">
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr</primary>
|
||||||
|
<secondary>compatibility matrix</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>compatibility matrix</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>&repmgr; compatibility matrix</title>
|
||||||
|
<para>
|
||||||
|
The following table provides an overview of which &repmgr; version supports
|
||||||
|
which PostgreSQL version.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
|
<table id="repmgr-compatibility-matrix">
|
||||||
|
<title>&repmgr; compatibility matrix</title>
|
||||||
|
|
||||||
|
<tgroup cols="2">
|
||||||
|
<thead>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
&repmgr; version
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
Latest release
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
Supported PostgreSQL versions
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
&repmgr; 4.x
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<link linkend="release-4.2">4.2</link> (2018-10-24)
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
9.3, 9.4, 9.5, 9.6, 10, 11
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
&repmgr; 3.x
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
9.3, 9.4, 9.5, 9.6
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
&repmgr; 2.x
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
9.0, 9.1, 9.2, 9.3, 9.4
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
</tbody>
|
||||||
|
|
||||||
|
</tgroup>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
The &repmgr; 2.x and 3.x series are no longer maintained or supported.
|
||||||
|
We strongly recommend upgrading to the latest &repmgr; version.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||||
|
</para>
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
PostgreSQL 9.3 does not support replication slots, so corresponding &repmgr; functionality
|
||||||
|
is not available.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In PostgreSQL 9.3 and PostgreSQL 9.4, <command>pg_rewind</command> is not part of the core
|
||||||
|
distribution. <command>pg_rewind</command> will need to be compiled separately to be able
|
||||||
|
to use any &repmgr; functionality which takes advantage of it.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -12,8 +12,8 @@
|
|||||||
To install &repmgr; the prerequisites for compiling
|
To install &repmgr; the prerequisites for compiling
|
||||||
&postgres; must be installed. These are described in &postgres;'s
|
&postgres; must be installed. These are described in &postgres;'s
|
||||||
documentation
|
documentation
|
||||||
on <ulink url="https://www.postgresql.org/docs/current/static/install-requirements.html">build requirements</ulink>
|
on <ulink url="https://www.postgresql.org/docs/current/install-requirements.html">build requirements</ulink>
|
||||||
and <ulink url="https://www.postgresql.org/docs/current/static/docguide-toolsets.html">build requirements for documentation</ulink>.
|
and <ulink url="https://www.postgresql.org/docs/current/docguide-toolsets.html">build requirements for documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@@ -26,12 +26,68 @@
|
|||||||
add the <ulink
|
add the <ulink
|
||||||
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
||||||
repository to your <filename>sources.list</filename> if you
|
repository to your <filename>sources.list</filename> if you
|
||||||
have not already done so. Then install the pre-requisites for
|
have not already done so, and ensure the source repository is enabled.
|
||||||
building PostgreSQL with:
|
</para>
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
If not configured, the source repository can be added by including
|
||||||
|
a <literal>deb-src</literal> line as a copy of the existing <literal>deb</literal>
|
||||||
|
line in the repository file, which is usually
|
||||||
|
<filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
deb http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
|
||||||
|
deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
<para>
|
||||||
|
Then install the prerequisites for
|
||||||
|
building PostgreSQL with e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get build-dep postgresql-9.6</programlisting>
|
sudo apt-get build-dep postgresql-9.6</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<simpara>
|
||||||
|
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||||
|
</simpara>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If using <command>apt-get build-dep</command> is not possible, the
|
||||||
|
following packages may need to be installed manually:
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibedit-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibkrb5-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibpam0g-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibreadline-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibselinux1-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibssl-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibxml2-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>llibxslt1-dev</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
@@ -45,15 +101,55 @@
|
|||||||
sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
|
sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
|
||||||
sudo yum-builddep postgresql96</programlisting>
|
sudo yum-builddep postgresql96</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<simpara>
|
||||||
|
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||||
|
</simpara>
|
||||||
|
</important>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If using <command>yum-builddep</command> is not possible, the
|
||||||
|
following packages may need to be installed manually:
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>libselinux-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>libxml2-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>libxslt-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>openssl-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>pam-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>readline-devel</literal></simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
If building against PostgreSQL 11 or later configured with the <option>--with-llvm</option> option
|
||||||
|
(this is the case with the PGDG-provided packages) you'll also need to install the
|
||||||
|
<literal>llvm-toolset-7-clang</literal> package. This is available via the
|
||||||
|
<ulink url="https://wiki.centos.org/AdditionalResources/Repositories/SCL">Software Collections (SCL) Repository</ulink>.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
Select the appropriate PostgreSQL versions for your target repmgr version.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
@@ -80,7 +176,7 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
There are also tags for each &repmgr; release, e.g. <filename>4.0.5</filename>.
|
There are also tags for each &repmgr; release, e.g. <literal>v4.2.0</literal>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@@ -146,7 +242,7 @@
|
|||||||
The &repmgr; documentation is (like the main PostgreSQL project)
|
The &repmgr; documentation is (like the main PostgreSQL project)
|
||||||
written in DocBook format. To build it locally as HTML, you'll need to
|
written in DocBook format. To build it locally as HTML, you'll need to
|
||||||
install the required packages as described in the
|
install the required packages as described in the
|
||||||
<ulink url="https://www.postgresql.org/docs/9.6/static/docguide-toolsets.html">
|
<ulink url="https://www.postgresql.org/docs/9.6/docguide-toolsets.html">
|
||||||
PostgreSQL documentation</ulink> then execute:
|
PostgreSQL documentation</ulink> then execute:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
./configure && make install-doc</programlisting>
|
./configure && make install-doc</programlisting>
|
||||||
@@ -165,7 +261,7 @@
|
|||||||
<note>
|
<note>
|
||||||
<simpara>
|
<simpara>
|
||||||
Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
|
Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
|
||||||
the documentation can currently only be built agains PostgreSQL 9.6 or earlier.
|
the documentation can currently only be built against PostgreSQL 9.6 or earlier.
|
||||||
This limitation will be fixed when time and resources permit.
|
This limitation will be fixed when time and resources permit.
|
||||||
</simpara>
|
</simpara>
|
||||||
</note>
|
</note>
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
<date>2017</date>
|
<date>2017</date>
|
||||||
|
|
||||||
<copyright>
|
<copyright>
|
||||||
<year>2010-2018</year>
|
<year>2010-2019</year>
|
||||||
<holder>2ndQuadrant, Ltd.</holder>
|
<holder>2ndQuadrant, Ltd.</holder>
|
||||||
</copyright>
|
</copyright>
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
<title>Legal Notice</title>
|
<title>Legal Notice</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<productname>repmgr</productname> is Copyright © 2010-2018
|
<productname>repmgr</productname> is Copyright © 2010-2019
|
||||||
by 2ndQuadrant, Ltd. All rights reserved.
|
by 2ndQuadrant, Ltd. All rights reserved.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
<chapter id="quickstart" xreflabel="Quick-start guide">
|
<chapter id="quickstart" xreflabel="Quick-start guide">
|
||||||
<title>Quick-start guide</title>
|
<title>Quick-start guide</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>quickstart</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
This section gives a quick introduction to &repmgr;, including setting up a
|
This section gives a quick introduction to &repmgr;, including setting up a
|
||||||
sample &repmgr; installation and a basic replication cluster.
|
sample &repmgr; installation and a basic replication cluster.
|
||||||
@@ -50,7 +54,8 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If you want <application>repmgr</application> to copy configuration files which are
|
If you want <application>repmgr</application> to copy configuration files which are
|
||||||
located outside the PostgreSQL data directory, and/or to test <command>switchover</command>
|
located outside the PostgreSQL data directory, and/or to test
|
||||||
|
<command><link linkend="repmgr-standby-switchover">switchover</link></command>
|
||||||
functionality, you will also need passwordless SSH connections between both servers, and
|
functionality, you will also need passwordless SSH connections between both servers, and
|
||||||
<application>rsync</application> should be installed.
|
<application>rsync</application> should be installed.
|
||||||
</para>
|
</para>
|
||||||
@@ -63,7 +68,7 @@
|
|||||||
</tip>
|
</tip>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="quickstart-postgresql-configuration">
|
<sect1 id="quickstart-postgresql-configuration" xreflabel="PostgreSQL configuration">
|
||||||
<title>PostgreSQL configuration</title>
|
<title>PostgreSQL configuration</title>
|
||||||
<para>
|
<para>
|
||||||
On the primary server, a PostgreSQL instance must be initialised and running.
|
On the primary server, a PostgreSQL instance must be initialised and running.
|
||||||
@@ -78,6 +83,13 @@
|
|||||||
|
|
||||||
max_wal_senders = 10
|
max_wal_senders = 10
|
||||||
|
|
||||||
|
# Enable replication slots; set this figure to at least one more
|
||||||
|
# than the number of standbys which will connect to this server.
|
||||||
|
# Note that repmgr will only make use of replication slots if
|
||||||
|
# "use_replication_slots" is set to "true" in repmgr.conf
|
||||||
|
|
||||||
|
max_replication_slots = 0
|
||||||
|
|
||||||
# Ensure WAL files contain enough information to enable read-only queries
|
# Ensure WAL files contain enough information to enable read-only queries
|
||||||
# on the standby.
|
# on the standby.
|
||||||
#
|
#
|
||||||
@@ -85,7 +97,7 @@
|
|||||||
# PostgreSQL 9.6 and later: one of 'replica' or 'logical'
|
# PostgreSQL 9.6 and later: one of 'replica' or 'logical'
|
||||||
# ('hot_standby' will still be accepted as an alias for 'replica')
|
# ('hot_standby' will still be accepted as an alias for 'replica')
|
||||||
#
|
#
|
||||||
# See: https://www.postgresql.org/docs/current/static/runtime-config-wal.html#GUC-WAL-LEVEL
|
# See: https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL
|
||||||
|
|
||||||
wal_level = 'hot_standby'
|
wal_level = 'hot_standby'
|
||||||
|
|
||||||
@@ -102,16 +114,6 @@
|
|||||||
# you WALs in a secure place. /bin/true is an example of a command that
|
# you WALs in a secure place. /bin/true is an example of a command that
|
||||||
# ignores archiving. Use something more sensible.
|
# ignores archiving. Use something more sensible.
|
||||||
archive_command = '/bin/true'
|
archive_command = '/bin/true'
|
||||||
|
|
||||||
# If you have configured "pg_basebackup_options"
|
|
||||||
# in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
|
|
||||||
# PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
|
|
||||||
# "restore_command" in "repmgr.conf"to fetch WAL files from another
|
|
||||||
# source such as Barman, you'll need to set "wal_keep_segments" to a
|
|
||||||
# high enough value to ensure that all WAL files generated while
|
|
||||||
# the standby is being cloned are retained until the standby starts up.
|
|
||||||
#
|
|
||||||
# wal_keep_segments = 5000
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
<tip>
|
<tip>
|
||||||
<simpara>
|
<simpara>
|
||||||
@@ -126,6 +128,9 @@
|
|||||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||||
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the <link linkend="configuration">repmgr configuaration guide</link>.
|
||||||
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="quickstart-repmgr-user-database">
|
<sect1 id="quickstart-repmgr-user-database">
|
||||||
@@ -196,11 +201,20 @@
|
|||||||
<sect1 id="quickstart-standby-preparation">
|
<sect1 id="quickstart-standby-preparation">
|
||||||
<title>Preparing the standby</title>
|
<title>Preparing the standby</title>
|
||||||
<para>
|
<para>
|
||||||
On the standby, do not create a PostgreSQL instance, but do ensure the destination
|
On the standby, do <emphasis>not</emphasis> create a PostgreSQL instance (i.e.
|
||||||
|
do not execute <application>initdb</application> or any database creation
|
||||||
|
scripts provided by packages), but do ensure the destination
|
||||||
data directory (and any other directories which you want PostgreSQL to use)
|
data directory (and any other directories which you want PostgreSQL to use)
|
||||||
exist and are owned by the <literal>postgres</literal> system user. Permissions
|
exist and are owned by the <literal>postgres</literal> system user. Permissions
|
||||||
must be set to <literal>0700</literal> (<literal>drwx------</literal>).
|
must be set to <literal>0700</literal> (<literal>drwx------</literal>).
|
||||||
</para>
|
</para>
|
||||||
|
<tip>
|
||||||
|
<simpara>
|
||||||
|
&repmgr; will place a copy of the primary's database files in this directory.
|
||||||
|
It will however refuse to run if a PostgreSQL instance has already been
|
||||||
|
created there.
|
||||||
|
</simpara>
|
||||||
|
</tip>
|
||||||
<para>
|
<para>
|
||||||
Check the primary database is reachable from the standby using <application>psql</application>:
|
Check the primary database is reachable from the standby using <application>psql</application>:
|
||||||
</para>
|
</para>
|
||||||
@@ -210,7 +224,7 @@
|
|||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; stores connection information as <ulink
|
&repmgr; stores connection information as <ulink
|
||||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">libpq
|
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING">libpq
|
||||||
connection strings</ulink> throughout. This documentation refers to them as <literal>conninfo</literal>
|
connection strings</ulink> throughout. This documentation refers to them as <literal>conninfo</literal>
|
||||||
strings; an alternative name is <literal>DSN</literal> (<literal>data source name</literal>).
|
strings; an alternative name is <literal>DSN</literal> (<literal>data source name</literal>).
|
||||||
We'll use these in place of the <command>-h hostname -d databasename -U username</command> syntax.
|
We'll use these in place of the <command>-h hostname -d databasename -U username</command> syntax.
|
||||||
@@ -432,7 +446,7 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
From PostgreSQL 9.6 you can also use the view
|
From PostgreSQL 9.6 you can also use the view
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/monitoring-stats.html#PG-STAT-WAL-RECEIVER-VIEW">
|
<ulink url="https://www.postgresql.org/docs/current/monitoring-stats.html#PG-STAT-WAL-RECEIVER-VIEW">
|
||||||
<literal>pg_stat_wal_receiver</literal></ulink> to check the replication status from the standby.
|
<literal>pg_stat_wal_receiver</literal></ulink> to check the replication status from the standby.
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr cluster crosscheck</command>:
|
One of the following exit codes will be emitted by <command>repmgr cluster crosscheck</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
|
|||||||
@@ -102,7 +102,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr cluster matrix</command>:
|
One of the following exit codes will be emitted by <command>repmgr cluster matrix</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,14 @@
|
|||||||
directly and can be run on any node in the cluster; this is also useful when analyzing
|
directly and can be run on any node in the cluster; this is also useful when analyzing
|
||||||
connectivity from a particular node.
|
connectivity from a particular node.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Node availability is tested by connecting from the node where
|
||||||
|
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
||||||
|
is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
|
||||||
|
better overviews of connections between nodes.
|
||||||
|
</para>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -44,39 +52,59 @@
|
|||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show
|
||||||
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
||||||
----+-------+---------+-----------+----------+----------+-----------------------------------------
|
----+-------+---------+-----------+----------+----------+----------+-----------------------------------------
|
||||||
1 | node1 | primary | * running | | default | host=db_node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | * running | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
||||||
2 | node2 | standby | running | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
2 | node2 | standby | running | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Notes</title>
|
<title>Notes</title>
|
||||||
<para>
|
<para>
|
||||||
The column <literal>Role</literal> shows the expected server role according to the
|
The column <literal>Role</literal> shows the expected server role according to the
|
||||||
&repmgr; metadata. <literal>Status</literal> shows whether the server is running or unreachable.
|
&repmgr; metadata.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<literal>Status</literal> shows whether the server is running or unreachable.
|
||||||
If the node has an unexpected role not reflected in the &repmgr; metadata, e.g. a node was manually
|
If the node has an unexpected role not reflected in the &repmgr; metadata, e.g. a node was manually
|
||||||
promoted to primary, this will be highlighted with an exclamation mark, e.g.:
|
promoted to primary, this will be highlighted with an exclamation mark.
|
||||||
|
If a connection to the node cannot be made, this will be highlighted with a question mark.
|
||||||
|
Note that the node will only be shown as <literal>? unreachable</literal>
|
||||||
|
if a connection is not possible at network level; if the PostgreSQL instance on the
|
||||||
|
node is pingable but not accepting connections, it will be shown as <literal>? running</literal>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In the following example, executed on <literal>node3</literal>, <literal>node1</literal> is not reachable
|
||||||
|
at network level and assumed to be down; <literal>node2</literal> has been promoted to primary
|
||||||
|
(but <literal>node3</literal> is not attached to it, and its metadata has not yet been updated);
|
||||||
|
<literal>node4</literal> is running but rejecting connections (from <literal>node3</literal> at least).
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
||||||
|
----+-------+---------+----------------------+----------+----------+----------+-----------------------------------------
|
||||||
|
1 | node1 | primary | ? unreachable | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
||||||
|
2 | node2 | standby | ! running as primary | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
|
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr
|
||||||
|
4 | node4 | standby | ? running | node1 | default | 100 | host=db_node4 dbname=repmgr user=repmgr
|
||||||
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
WARNING: following issues were detected
|
||||||
----+-------+---------+----------------------+----------+----------+-----------------------------------------
|
- unable to connect to node "node1" (ID: 1)
|
||||||
1 | node1 | primary | ? unreachable | | default | host=db_node1 dbname=repmgr user=repmgr
|
- node "node1" (ID: 1) is registered as an active primary but is unreachable
|
||||||
2 | node2 | standby | ! running as primary | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
- node "node2" (ID: 2) is registered as standby but running as primary
|
||||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr
|
- unable to connect to node "node4" (ID: 4)
|
||||||
|
HINT: execute with --verbose option to see connection error messages</programlisting>
|
||||||
WARNING: following issues were detected
|
|
||||||
node "node1" (ID: 1) is registered as an active primary but is unreachable
|
|
||||||
node "node2" (ID: 2) is registered as standby but running as primary</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Node availability is tested by connecting from the node where
|
|
||||||
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
|
||||||
is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
|
|
||||||
a better overviews of connections between nodes.
|
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
To diagnose connection issues, execute <command>repmgr cluster show</command>
|
||||||
|
with the <option>--verbose</option> option; this will display the error message
|
||||||
|
for each failed connection attempt.
|
||||||
|
</para>
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
Use <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck">
|
||||||
|
to diagnose connection issues across the whole replication cluster.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -87,38 +115,56 @@
|
|||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--csv</option></term>
|
<term><option>--csv</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||||
parsing by scripts, e.g.:
|
parsing by scripts, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||||
1,-1,-1
|
1,-1,-1
|
||||||
2,0,0
|
2,0,0
|
||||||
3,0,1</programlisting>
|
3,0,1</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The columns have following meanings:
|
The columns have following meanings:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
node ID
|
node ID
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
availability (0 = available, -1 = unavailable)
|
availability (0 = available, -1 = unavailable)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--compact</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Suppress display of the <literal>conninfo</literal> column.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--terse</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Suppress warnings about connection issues.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--verbose</option></term>
|
<term><option>--verbose</option></term>
|
||||||
@@ -137,7 +183,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr cluster show</command>:
|
One of the following exit codes will be emitted by <command>repmgr cluster show</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
@@ -150,11 +196,31 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
An issue was encountered while attempting to retrieve
|
||||||
|
&repmgr; metadata.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_DB_CONN (6)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to connect to the local PostgreSQL instance.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
One or more issues were detected.
|
One or more issues were detected with the replication configuration,
|
||||||
|
e.g. a node was not in its expected state.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|||||||
@@ -3,6 +3,11 @@
|
|||||||
<primary>repmgr daemon pause</primary>
|
<primary>repmgr daemon pause</primary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>pausing</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<refmeta>
|
<refmeta>
|
||||||
<refentrytitle>repmgr daemon pause</refentrytitle>
|
<refentrytitle>repmgr daemon pause</refentrytitle>
|
||||||
</refmeta>
|
</refmeta>
|
||||||
@@ -74,7 +79,7 @@ NOTICE: node 3 (node3) paused</programlisting>
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
|
One of the following exit codes will be emitted by <command>repmgr daemon unpause</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
|
|||||||
203
doc/repmgr-daemon-start.sgml
Normal file
203
doc/repmgr-daemon-start.sgml
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
<refentry id="repmgr-daemon-start">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr daemon start</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>starting</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<refmeta>
|
||||||
|
<refentrytitle>repmgr daemon start</refentrytitle>
|
||||||
|
</refmeta>
|
||||||
|
|
||||||
|
<refnamediv>
|
||||||
|
<refname>repmgr daemon start</refname>
|
||||||
|
<refpurpose>Start the <application>repmgrd</application> daemon</refpurpose>
|
||||||
|
</refnamediv>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Description</title>
|
||||||
|
<para>
|
||||||
|
This command starts the <application>repmgrd</application> daemon on the
|
||||||
|
local node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||||
|
started. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||||
|
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
The <filename>repmgr.conf</filename> parameter <varname>repmgrd_service_start_command</varname>
|
||||||
|
must be set for <command>repmgr daemon start</command> to work; see section
|
||||||
|
<xref linkend="repmgr-daemon-start-configuration"> for details.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually attempt to start <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This action will output the command which would be executed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-w</option></term>
|
||||||
|
<term><option>--wait</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||||
|
started successfully.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--no-wait</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Don't wait to confirm that <application>repmgrd</application>
|
||||||
|
started successfully.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This is equivalent to providing <option>--wait=0</option>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1 id="repmgr-daemon-start-configuration" xreflabel="repmgr daemon start configuration">
|
||||||
|
<title>Configuration file settings</title>
|
||||||
|
<para>
|
||||||
|
The following parameter in <filename>repmgr.conf</filename> is relevant
|
||||||
|
to <command>repmgr daemon start</command>:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd_service_start_command</primary>
|
||||||
|
<secondary>with "repmgr daemon start"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>repmgrd_service_start_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>repmgr daemon start</command> will execute the command defined by the
|
||||||
|
<varname>repmgrd_service_start_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||||
|
This must be set to a shell command which will start <application>repmgrd</application>;
|
||||||
|
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||||
|
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||||
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
If &repmgr; was installed from a system package, and you do not configure
|
||||||
|
<varname>repmgrd_service_start_command</varname> to an appropriate service command, this may
|
||||||
|
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||||
|
service; this is particularly the case with <literal>systemd</literal>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
One of the following exit codes will be emitted by <command>repmgr daemon start</command>:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The <application>repmgrd</application> start command (defined in
|
||||||
|
<varname>repmgrd_service_start_command</varname>) was successfully executed.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the <option>--wait</option> option was provided, &repmgr; will confirm that
|
||||||
|
<application>repmgrd</application> has actually started up.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<varname>repmgrd_service_start_command</varname> is not defined in
|
||||||
|
<filename>repmgr.conf</filename>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_DB_CONN (6)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to connect to the local PostgreSQL node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL must be running before <application>repmgrd</application>
|
||||||
|
can be started. Additionally, unless the <option>--no-wait</option> option was
|
||||||
|
provided, &repmgr; needs to be able to connect to the local PostgreSQL node
|
||||||
|
to determine the state of <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_REPMGRD_SERVICE (27)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The <application>repmgrd</application> start command (defined in
|
||||||
|
<varname>repmgrd_service_start_command</varname>) was not successfully executed.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This can also mean that &repmgr; was unable to confirm whether <application>repmgrd</application>
|
||||||
|
successfully started (unless the <option>--no-wait</option> option was provided).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>See also</title>
|
||||||
|
<para>
|
||||||
|
<xref linkend="repmgr-daemon-stop">, <xref linkend="repmgr-daemon-status">, <xref linkend="repmgrd-daemon">
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
</refentry>
|
||||||
@@ -3,6 +3,11 @@
|
|||||||
<primary>repmgr daemon status</primary>
|
<primary>repmgr daemon status</primary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>displaying daemon status</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<refmeta>
|
<refmeta>
|
||||||
<refentrytitle>repmgr daemon status</refentrytitle>
|
<refentrytitle>repmgr daemon status</refentrytitle>
|
||||||
</refmeta>
|
</refmeta>
|
||||||
@@ -28,7 +33,10 @@
|
|||||||
<command>repmgr daemon status</command> can be executed on any active node in the
|
<command>repmgr daemon status</command> can be executed on any active node in the
|
||||||
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
|
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
If PostgreSQL is not running on a node, &repmgr; will not be able to determine the
|
||||||
|
status of that node's <application>repmgrd</application> instance.
|
||||||
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
|
After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
|
||||||
@@ -44,33 +52,34 @@
|
|||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> running normally on all nodes:
|
<application>repmgrd</application> running normally on all nodes:
|
||||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||||
ID | Name | Role | Status | repmgrd | PID | Paused?
|
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||||
----+-------+---------+---------+---------+------+---------
|
----+-------+---------+----------+---------+---------+-------+---------+--------------------
|
||||||
1 | node1 | primary | running | running | 7851 | no
|
1 | node1 | primary | 100 | running | running | 71987 | no | n/a
|
||||||
2 | node2 | standby | running | running | 7889 | no
|
2 | node2 | standby | 100 | running | running | 71996 | no | 1 second(s) ago
|
||||||
3 | node3 | standby | running | running | 7918 | no</programlisting>
|
3 | node3 | standby | 100 | running | running | 72042 | no | 1 second(s) ago
|
||||||
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
|
<application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
|
||||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||||
ID | Name | Role | Status | repmgrd | PID | Paused?
|
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||||
----+-------+---------+---------+---------+------+---------
|
----+-------+---------+----------+---------+---------+-------+---------+--------------------
|
||||||
1 | node1 | primary | running | running | 7851 | yes
|
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a
|
||||||
2 | node2 | standby | running | running | 7889 | yes
|
2 | node2 | standby | 100 | running | running | 71996 | yes | 0 second(s) ago
|
||||||
3 | node3 | standby | running | running | 7918 | yes</programlisting>
|
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago
|
||||||
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> not running on one node:
|
<application>repmgrd</application> not running on one node:
|
||||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||||
ID | Name | Role | Status | repmgrd | PID | Paused?
|
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||||
----+-------+---------+---------+-------------+------+---------
|
----+-------+---------+----------+---------+-------------+-------+---------+--------------------
|
||||||
1 | node1 | primary | running | running | 7851 | yes
|
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a
|
||||||
2 | node2 | standby | running | not running | n/a | n/a
|
2 | node2 | standby | 100 | running | not running | n/a | n/a | n/a
|
||||||
3 | node3 | standby | running | running | 7918 | yes</programlisting>
|
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -81,76 +90,88 @@
|
|||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--csv</option></term>
|
<term><option>--csv</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
|
<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||||
parsing by scripts, e.g.:
|
parsing by scripts, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf daemon status --csv
|
$ repmgr -f /etc/repmgr.conf daemon status --csv
|
||||||
1,node1,primary,1,1,10204,1
|
1,node1,primary,1,1,5722,1,100,-1
|
||||||
2,node2,standby,1,0,-1,1
|
2,node2,standby,1,0,-1,1,100,1
|
||||||
3,node3,standby,1,1,10225,1</programlisting>
|
3,node3,standby,1,1,5779,1,100,1</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The columns have following meanings:
|
The columns have following meanings:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
node ID
|
node ID
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
node name
|
node name
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
node type (primary or standby)
|
node type (primary or standby)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
PostgreSQL server running
|
PostgreSQL server running (1 = running, 0 = not running)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<application>repmgrd</application> running (1 = running, 0 = not running)
|
<application>repmgrd</application> running (1 = running, 0 = not running, -1 = unknown)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<application>repmgrd</application> PID (-1 if not running)
|
<application>repmgrd</application> PID (-1 if not running or status unknown)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<application>repmgrd</application> paused (1 = paused, 0 = not paused)
|
<application>repmgrd</application> paused (1 = paused, 0 = not paused, -1 = unknown)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
<listitem>
|
||||||
</para>
|
<simpara>
|
||||||
</listitem>
|
<application>repmgrd</application> node priority
|
||||||
</varlistentry>
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
interval in seconds since the node's upstream was last seen (this will be -1 if the value could not be retrieved, or the node is primary)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--verbose</option></term>
|
<term><option>--verbose</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Display the full text of any database connection error messages
|
Display the full text of any database connection error messages
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|||||||
200
doc/repmgr-daemon-stop.sgml
Normal file
200
doc/repmgr-daemon-stop.sgml
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
<refentry id="repmgr-daemon-stop">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr daemon stop</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>stopping</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<refmeta>
|
||||||
|
<refentrytitle>repmgr daemon stop</refentrytitle>
|
||||||
|
</refmeta>
|
||||||
|
|
||||||
|
<refnamediv>
|
||||||
|
<refname>repmgr daemon stop</refname>
|
||||||
|
<refpurpose>Stop the <application>repmgrd</application> daemon</refpurpose>
|
||||||
|
</refnamediv>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Description</title>
|
||||||
|
<para>
|
||||||
|
This command stops the <application>repmgrd</application> daemon on the
|
||||||
|
local node.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||||
|
stopped. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||||
|
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If PostgreSQL is not running on the local node, under some circumstances &repmgr; may not
|
||||||
|
be able to confirm if <application>repmgrd</application> has actually stopped.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
The <filename>repmgr.conf</filename> parameter <varname>repmgrd_service_stop_command</varname>
|
||||||
|
must be set for <command>repmgr daemon stop</command> to work; see section
|
||||||
|
<xref linkend="repmgr-daemon-stop-configuration"> for details.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Configuration</title>
|
||||||
|
<para>
|
||||||
|
<command>repmgr daemon stop</command> will execute the command defined by the
|
||||||
|
<varname>repmgrd_service_stop_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||||
|
This must be set to a shell command which will stop <application>repmgrd</application>;
|
||||||
|
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||||
|
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
If &repmgr; was installed from a system package, and you do not configure
|
||||||
|
<varname>repmgrd_service_stop_command</varname> to an appropriate service command, this may
|
||||||
|
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||||
|
service; this is particularly the case with <literal>systemd</literal>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually attempt to stop <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This action will output the command which would be executed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-w</option></term>
|
||||||
|
<term><option>--wait</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||||
|
stopped successfully.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--no-wait</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Don't wait to confirm that <application>repmgrd</application>
|
||||||
|
stopped successfully.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This is equivalent to providing <option>--wait=0</option>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1 id="repmgr-daemon-stop-configuration" xreflabel="repmgr daemon stop configuration">
|
||||||
|
<title>Configuration file settings</title>
|
||||||
|
<para>
|
||||||
|
The following parameter in <filename>repmgr.conf</filename> is relevant
|
||||||
|
to <command>repmgr daemon stop</command>:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd_service_stop_command</primary>
|
||||||
|
<secondary>with "repmgr daemon stop"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>repmgrd_service_stop_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>repmgr daemon stop</command> will execute the command defined by the
|
||||||
|
<varname>repmgrd_service_stop_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||||
|
This must be set to a shell command which will stop <application>repmgrd</application>;
|
||||||
|
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||||
|
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||||
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
If &repmgr; was installed from a system package, and you do not configure
|
||||||
|
<varname>repmgrd_service_stop_command</varname> to an appropriate service command, this may
|
||||||
|
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||||
|
service; this is particularly the case with <literal>systemd</literal>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
One of the following exit codes will be emitted by <command>repmgr daemon stop</command>:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> could be stopped.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<varname>repmgrd_service_stop_command</varname> is not defined in
|
||||||
|
<filename>repmgr.conf</filename>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_REPMGRD_SERVICE (27)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> could not be stopped.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>See also</title>
|
||||||
|
<para>
|
||||||
|
<xref linkend="repmgr-daemon-start">, <xref linkend="repmgr-daemon-status">, <xref linkend="repmgrd-daemon">
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
</refentry>
|
||||||
@@ -3,6 +3,12 @@
|
|||||||
<primary>repmgr daemon unpause</primary>
|
<primary>repmgr daemon unpause</primary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>unpausing</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
|
||||||
<refmeta>
|
<refmeta>
|
||||||
<refentrytitle>repmgr daemon unpause</refentrytitle>
|
<refentrytitle>repmgr daemon unpause</refentrytitle>
|
||||||
</refmeta>
|
</refmeta>
|
||||||
@@ -68,7 +74,7 @@ NOTICE: node 3 (node3) unpaused</programlisting>
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
|
One of the following exit codes will be emitted by <command>repmgr daemon unpause</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,14 @@
|
|||||||
Performs some health checks on a node from a replication perspective.
|
Performs some health checks on a node from a replication perspective.
|
||||||
This command must be run on the local node.
|
This command must be run on the local node.
|
||||||
</para>
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
Currently &repmgr; performs health checks on physical replication
|
||||||
|
slots only, with the aim of warning about streaming replication standbys which
|
||||||
|
have become detached and the associated risk of uncontrolled WAL file
|
||||||
|
growth.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -30,8 +38,8 @@
|
|||||||
Replication lag: OK (N/A - node is primary)
|
Replication lag: OK (N/A - node is primary)
|
||||||
WAL archiving: OK (0 pending files)
|
WAL archiving: OK (0 pending files)
|
||||||
Downstream servers: OK (2 of 2 downstream nodes attached)
|
Downstream servers: OK (2 of 2 downstream nodes attached)
|
||||||
Replication slots: OK (node has no replication slots)
|
Replication slots: OK (node has no physical replication slots)
|
||||||
Missing replication slots: OK (node has no missing replication slots)</programlisting>
|
Missing replication slots: OK (node has no missing physical replication slots)</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -44,7 +52,7 @@
|
|||||||
OK (node is primary)</programlisting>
|
OK (node is primary)</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Parameters for individual checks are as follows:
|
Parameters for individual checks are as follows:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
@@ -76,16 +84,26 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--slots</literal>: checks there are no inactive replication slots
|
<literal>--slots</literal>: checks there are no inactive physical replication slots
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--missing-slots</literal>: checks there are no missing replication slots
|
<literal>--missing-slots</literal>: checks there are no missing physical replication slots
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<literal>--data-directory-config</literal>: checks the data directory configured in
|
||||||
|
<filename>repmgr.conf</filename> matches the actual data directory.
|
||||||
|
This check is not directly related to replication, but is useful to verify &repmgr;
|
||||||
|
is correctly configured.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
@@ -105,6 +123,7 @@
|
|||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--nagios</literal>: generate output in a Nagios-compatible format
|
<literal>--nagios</literal>: generate output in a Nagios-compatible format
|
||||||
|
(for individual checks only)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
@@ -151,9 +170,10 @@
|
|||||||
|
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr status check</command>
|
One of the following exit codes will be emitted by <command>repmgr status check</command>
|
||||||
if no individual check was specified.
|
if no individual check was specified.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
@@ -175,6 +195,7 @@
|
|||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,7 @@
|
|||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Configuration file settings</title>
|
<title>Configuration file settings</title>
|
||||||
|
|
||||||
@@ -132,6 +133,11 @@
|
|||||||
the value set in <literal>standby_reconnect_timeout</literal>,
|
the value set in <literal>standby_reconnect_timeout</literal>,
|
||||||
60 seconds).
|
60 seconds).
|
||||||
</simpara>
|
</simpara>
|
||||||
|
<simpara>
|
||||||
|
Note that <literal>standby_reconnect_timeout</literal> must be
|
||||||
|
set to a value equal to or greater than
|
||||||
|
<literal>node_rejoin_timeout</literal>.
|
||||||
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
@@ -144,6 +150,55 @@
|
|||||||
A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
One of the following exit codes will be emitted by <command>repmgr node rejoin</command>:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The node rejoin succeeded; or if <option>--dry-run</option> was provided,
|
||||||
|
no issues were detected which would prevent the node rejoin.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
A configuration issue was detected which prevented &repmgr; from
|
||||||
|
continuing with the node rejoin.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_NO_RESTART (4)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The node could not be restarted.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_REJOIN_FAIL (24)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The node rejoin operation failed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Notes</title>
|
<title>Notes</title>
|
||||||
@@ -167,6 +222,10 @@
|
|||||||
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
|
<para>
|
||||||
|
&repmgr; will attempt to verify whether the node can rejoin as-is, or whether
|
||||||
|
<command>pg_rewind</command> must be used (see following section).
|
||||||
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
||||||
@@ -188,73 +247,137 @@
|
|||||||
<command>pg_rewind</command> <emphasis>requires</emphasis> that either
|
<command>pg_rewind</command> <emphasis>requires</emphasis> that either
|
||||||
<varname>wal_log_hints</varname> is enabled, or that
|
<varname>wal_log_hints</varname> is enabled, or that
|
||||||
data checksums were enabled when the cluster was initialized. See the
|
data checksums were enabled when the cluster was initialized. See the
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
We strongly recommend familiarizing yourself with <command>pg_rewind</command> before attempting
|
||||||
|
to use it with &repmgr;, as while it is an extremely useful tool, it is <emphasis>not</emphasis>
|
||||||
|
a "magic bullet" which can resolve all problematic replication situations.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
A typical use-case for <command>pg_rewind</command> is when a scenario like the following
|
||||||
|
is encountered:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
|
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
||||||
|
INFO: replication connection to the rejoin target node was successful
|
||||||
|
INFO: local and rejoin target system identifiers match
|
||||||
|
DETAIL: system identifier is 6652184002263212600
|
||||||
|
ERROR: this node cannot attach to rejoin target node 3
|
||||||
|
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||||
|
HINT: use --force-rewind to execute pg_rewind</programlisting>
|
||||||
|
|
||||||
|
Here, <literal>node3</literal> was promoted to a primary while the local node was
|
||||||
|
still attached to the previous primary; this can potentially happen during e.g. a
|
||||||
|
network split. <command>pg_rewind</command> can re-sync the local node with <literal>node3</literal>,
|
||||||
|
removing the need for a full reclone.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
|
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
|
||||||
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
||||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<important>
|
||||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
<para>
|
||||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||||
overwritten with those from the source server.
|
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||||
</para>
|
overwritten with those from the source server.
|
||||||
<para>
|
</para>
|
||||||
To prevent this happening, provide a comma-separated list of files to retain
|
<para>
|
||||||
using the <literal>--config-file</literal> command line option; the specified files
|
To prevent this happening, provide a comma-separated list of files to retain
|
||||||
will be archived in a temporary directory (whose parent directory can be specified with
|
using the <literal>--config-file</literal> command line option; the specified files
|
||||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
will be archived in a temporary directory (whose parent directory can be specified with
|
||||||
complete.
|
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
||||||
</para>
|
complete.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
Example, first using <literal>--dry-run</literal>, then actually executing the
|
||||||
<literal>node rejoin command</literal>.
|
<literal>node rejoin command</literal>.
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
||||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
INFO: replication connection to the rejoin target node was successful
|
||||||
|
INFO: local and rejoin target system identifiers match
|
||||||
|
DETAIL: system identifier is 6652460429293670710
|
||||||
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||||
|
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||||
INFO: prerequisites for using pg_rewind are met
|
INFO: prerequisites for using pg_rewind are met
|
||||||
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.local.conf"
|
||||||
INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
INFO: file "postgresql.replication-setup.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.replication-setup.conf"
|
||||||
INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
|
|
||||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
|
||||||
INFO: pg_rewind would now be executed
|
INFO: pg_rewind would now be executed
|
||||||
DETAIL: pg_rewind command is:
|
DETAIL: pg_rewind command is:
|
||||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
|
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
||||||
|
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
||||||
this checks the prerequisites for using <application>pg_rewind</application>, but cannot
|
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
||||||
predict the outcome of actually executing <application>pg_rewind</application>.
|
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
||||||
|
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"> below.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
||||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||||
INFO: prerequisites for using pg_rewind are met
|
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||||
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
|
|
||||||
NOTICE: executing pg_rewind
|
NOTICE: executing pg_rewind
|
||||||
NOTICE: 2 files copied to /var/lib/pgsql/data
|
DETAIL: pg_rewind command is "pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'"
|
||||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
NOTICE: 2 files copied to /var/lib/postgresql/data
|
||||||
INFO: deleting "recovery.done"
|
NOTICE: setting node 2's upstream to node 3
|
||||||
INFO: setting node 1's primary to node 2
|
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||||
NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
|
||||||
waiting for server to start.... done
|
|
||||||
server started
|
|
||||||
NOTICE: NODE REJOIN successful
|
NOTICE: NODE REJOIN successful
|
||||||
DETAIL: node 1 is now attached to node 2</programlisting>
|
DETAIL: node 2 is now attached to node 3</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr node rejoin</primary>
|
||||||
|
<secondary>caveats</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Caveats when using <command>repmgr node rejoin</command></title>
|
||||||
|
<para>
|
||||||
|
<command>repmgr node rejoin</command> attempts to determine whether it will succeed by
|
||||||
|
comparing the timelines and relative WAL positions of the local node (rejoin candidate) and primary
|
||||||
|
(rejoin target). This is particularly important if planning to use <application>pg_rewind</application>,
|
||||||
|
which currently (as of PostgreSQL 11) may appear to succeed (or indicate there is no action
|
||||||
|
needed) but potentially allow an impossible action, such as trying to rejoin a standby to a
|
||||||
|
primary which is behind the standby. &repmgr; will prevent this situation from occurring.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Currently it is <emphasis>not</emphasis> possible to detect a situation where the rejoin target
|
||||||
|
is a standby which has been "promoted" by removing <filename>recovery.conf</filename>
|
||||||
|
(PostgreSQL 12 and later: <filename>standby.signal</filename>) and restarting it.
|
||||||
|
In this case there will be no information about the point the rejoin target diverged
|
||||||
|
from the current standby; the rejoin operation will fail and
|
||||||
|
the current standby's PostgreSQL log will contain entries with the text
|
||||||
|
"<literal>record with incorrect prev-link</literal>".
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We strongly recommend running <command>repmgr node rejoin</command> with the
|
||||||
|
<option>--dry-run</option> option first. Additionally it might be a good idea
|
||||||
|
to execute the <application>pg_rewind</application> command displayed by
|
||||||
|
&repmgr; with the <application>pg_rewind</application> <option>--dry-run</option>
|
||||||
|
option. Note that <application>pg_rewind</application> does not indicate that it
|
||||||
|
is running in <option>--dry-run</option> mode.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>See also</title>
|
<title>See also</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
151
doc/repmgr-node-service.sgml
Normal file
151
doc/repmgr-node-service.sgml
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
<refentry id="repmgr-node-service">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr node service</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<refmeta>
|
||||||
|
<refentrytitle>repmgr node service</refentrytitle>
|
||||||
|
</refmeta>
|
||||||
|
|
||||||
|
<refnamediv>
|
||||||
|
<refname>repmgr node service</refname>
|
||||||
|
<refpurpose>show or execute the system service command to stop/start/restart/reload/promote a node</refpurpose>
|
||||||
|
</refnamediv>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Description</title>
|
||||||
|
<para>
|
||||||
|
Shows or executes the system service command to stop/start/restart/reload a node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This command is mainly meant for internal &repmgr; usage, but is useful for
|
||||||
|
confirming the command configuration.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Log the steps which would be taken, including displaying the command which would be executed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--action</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The action to perform. One of <literal>start</literal>, <literal>stop</literal>,
|
||||||
|
<literal>restart</literal>, <literal>reload</literal> or <literal>promote</literal>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the parameter <option>--list-actions</option> is provided together with
|
||||||
|
<option>--action</option>, the command which would be executed will be printed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--list-actions</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
List all configured commands.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the parameter <option>--action</option> is provided together with
|
||||||
|
<option>--list-actions</option>, the command which would be executed for that
|
||||||
|
particular action will be printed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--checkpoint</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
One of the following exit codes will be emitted by <command>repmgr node service</command>:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
No issues were detected.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_LOCAL_COMMAND (5)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Execution of the system service command failed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Examples</title>
|
||||||
|
<para>
|
||||||
|
See what action would be taken for a restart:
|
||||||
|
<programlisting>
|
||||||
|
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint --dry-run
|
||||||
|
INFO: a CHECKPOINT would be issued here
|
||||||
|
INFO: would execute server command "sudo service postgresql-11 restart"</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Restart the PostgreSQL instance:
|
||||||
|
<programlisting>
|
||||||
|
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint
|
||||||
|
NOTICE: issuing CHECKPOINT
|
||||||
|
DETAIL: executing server command "sudo service postgresql-11 restart"
|
||||||
|
Redirecting to /bin/systemctl restart postgresql-11.service</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
List all commands:
|
||||||
|
<programlisting>
|
||||||
|
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions
|
||||||
|
Following commands would be executed for each action:
|
||||||
|
|
||||||
|
start: "sudo service postgresql-11 start"
|
||||||
|
stop: "sudo service postgresql-11 stop"
|
||||||
|
restart: "sudo service postgresql-11 restart"
|
||||||
|
reload: "sudo service postgresql-11 reload"
|
||||||
|
promote: "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
List a single command:
|
||||||
|
<programlisting>
|
||||||
|
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions --action=promote
|
||||||
|
/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote </programlisting>
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
</refentry>
|
||||||
@@ -55,7 +55,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr node status</command>:
|
One of the following exit codes will be emitted by <command>repmgr node status</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,15 @@
|
|||||||
installing the &repmgr; extension. This command needs to be executed before any
|
installing the &repmgr; extension. This command needs to be executed before any
|
||||||
standby nodes are registered.
|
standby nodes are registered.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
It's possibly to install the &repmgr; extension manually before executing
|
||||||
|
<command>repmgr primary register</command>; in this case &repmgr; will
|
||||||
|
detect the presence of the extension and skip that step.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -35,16 +44,16 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If providing the configuration file location with <option>-f/--config-file</option>,
|
If providing the configuration file location with <option>-f/--config-file</option>,
|
||||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||||
a relative path into an absolute one, but this may not be the same as the path you
|
a relative path into an absolute one, but this may not be the same as the path you
|
||||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||||
<filename>/path/to/repmgr.conf</filename>).
|
<filename>/path/to/repmgr.conf</filename>).
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|||||||
@@ -87,7 +87,7 @@
|
|||||||
<refsect1 id="repmgr-standby-clone-recovery-conf">
|
<refsect1 id="repmgr-standby-clone-recovery-conf">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>recovery.conf</primary>
|
<primary>recovery.conf</primary>
|
||||||
<secondary>customising with "repmgr standby clone"</secondary>
|
<secondary>customising with "repmgr standby clone"</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<title>Customising recovery.conf</title>
|
<title>Customising recovery.conf</title>
|
||||||
@@ -170,7 +170,7 @@
|
|||||||
pg_basebackup_options='--xlog-method=fetch'</programlisting>
|
pg_basebackup_options='--xlog-method=fetch'</programlisting>
|
||||||
|
|
||||||
and ensure that <literal>wal_keep_segments</literal> is set to an appropriately high value.
|
and ensure that <literal>wal_keep_segments</literal> is set to an appropriately high value.
|
||||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">
|
See the <ulink url="https://www.postgresql.org/docs/current/app-pgbasebackup.html">
|
||||||
pg_basebackup</ulink> documentation for details.
|
pg_basebackup</ulink> documentation for details.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@@ -194,10 +194,11 @@
|
|||||||
<title>Using a standby cloned by another method</title>
|
<title>Using a standby cloned by another method</title>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
||||||
<command><ulink url="http://docs.pgbarman.org/release/2.4/#recover">barman recover</ulink></command> command).
|
<command><ulink url="http://docs.pgbarman.org/release/2.5/#recover">barman recover</ulink></command> command).
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
To integrate the standby as a &repmgr; node, ensure the <filename>repmgr.conf</filename>
|
To integrate the standby as a &repmgr; node, once the standby has been cloned,
|
||||||
|
ensure the <filename>repmgr.conf</filename>
|
||||||
file is created for the node, and that it has been registered using
|
file is created for the node, and that it has been registered using
|
||||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>.
|
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>.
|
||||||
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
||||||
|
|||||||
@@ -9,23 +9,35 @@
|
|||||||
|
|
||||||
<refnamediv>
|
<refnamediv>
|
||||||
<refname>repmgr standby follow</refname>
|
<refname>repmgr standby follow</refname>
|
||||||
<refpurpose>attach a standby to a new primary</refpurpose>
|
<refpurpose>attach a running standby to a new upstream node</refpurpose>
|
||||||
</refnamediv>
|
</refnamediv>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Description</title>
|
<title>Description</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Attaches the standby to a new primary. This command requires a valid
|
Attaches the standby ("follow candidate") to a new upstream node
|
||||||
|
("follow target"). Typically this will be the primary, but this
|
||||||
|
command can also be used to attach the standby to another standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This command requires a valid
|
||||||
<filename>repmgr.conf</filename> file for the standby, either specified
|
<filename>repmgr.conf</filename> file for the standby, either specified
|
||||||
explicitly with <literal>-f/--config-file</literal> or located in a
|
explicitly with <literal>-f/--config-file</literal> or located in a
|
||||||
default location; no additional arguments are required.
|
default location; no additional arguments are required.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
By default &repmgr; will attempt to attach the standby to the current primary.
|
||||||
|
If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
|
||||||
|
to attach the standby to the specified node, which can be another standby.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
This command will force a restart of the standby server, which must be
|
This command will force a restart of the standby server, which must be
|
||||||
running. It can only be used to attach an active standby to the current primary node
|
running.
|
||||||
(and not to another standby).
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<tip>
|
<tip>
|
||||||
<para>
|
<para>
|
||||||
To re-add an inactive node to the replication cluster, use
|
To re-add an inactive node to the replication cluster, use
|
||||||
@@ -36,9 +48,22 @@
|
|||||||
<para>
|
<para>
|
||||||
<command>repmgr standby follow</command> will wait up to
|
<command>repmgr standby follow</command> will wait up to
|
||||||
<varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
|
<varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
|
||||||
to verify the standby has actually connected to the new primary.
|
to verify the standby has actually connected to the new upstream node.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If <option>recovery_min_apply_delay</option> is set for the standby, it
|
||||||
|
will not attach to the new upstream node until it has replayed available
|
||||||
|
WAL.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Conversely, if the standby is attached to an upstream standby
|
||||||
|
which has <option>recovery_min_apply_delay</option> set, the upstream
|
||||||
|
standby's replay state may actually be behind that of its new downstream node.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -65,19 +90,46 @@
|
|||||||
<term><option>--dry-run</option></term>
|
<term><option>--dry-run</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Check prerequisites but don't actually follow a new standby.
|
Check prerequisites but don't actually follow a new upstream node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This will also verify whether the standby is capable of following the new upstream node.
|
||||||
</para>
|
</para>
|
||||||
<important>
|
<important>
|
||||||
<para>
|
<para>
|
||||||
This does not guarantee the standby can follow the primary; in
|
If a standby was turned into a primary by removing <filename>recovery.conf</filename>
|
||||||
particular, whether the primary and standby timelines have diverged,
|
(<application>PostgreSQL 12</application> and later: <filename>standby.signal</filename>),
|
||||||
can currently only be determined by actually attempting to
|
&repmgr; will <emphasis>not</emphasis> be able to determine whether that primary's timeline
|
||||||
attach the standby to the primary.
|
has diverged from the timeline of the standby ("follow candidate").
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We recommend always to use <link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>
|
||||||
|
to promote a standby to primary, as this will ensure that the new primary
|
||||||
|
will perform a timeline switch (making it practical to check for timeline divergence)
|
||||||
|
and also that &repmgr; metadata is updated correctly.
|
||||||
</para>
|
</para>
|
||||||
</important>
|
</important>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--upstream-node-id</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Node ID of the new upstream node ("follow target").
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If not provided, &repmgr; will attempt to follow the current primary node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that when using <application>repmgrd</application>, <option>--upstream-node-id</option>
|
||||||
|
should always be configured;
|
||||||
|
see <link linkend="repmgrd-automatic-failover-configuration">Automatic failover configuration</link>
|
||||||
|
for details.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>-w</option></term>
|
<term><option>-w</option></term>
|
||||||
<term><option>--wait</option></term>
|
<term><option>--wait</option></term>
|
||||||
@@ -94,13 +146,104 @@
|
|||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Execution</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Execute with the <literal>--dry-run</literal> option to test the follow operation as
|
||||||
|
far as possible, without actually changing the status of the node.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Note that &repmgr; will first attempt to determine whether the standby
|
||||||
|
("follow candidate") is capable of following the
|
||||||
|
new upstream node ("follow target").
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If, for example, the new upstream node has diverged from this node's timeline,
|
||||||
|
for example if the new upstream node was promoted to primary while this node
|
||||||
|
was still attached to the original primary, it will <emphasis>not</emphasis>
|
||||||
|
be possible to follow the new upstream node, and &repmgr; will emit an error
|
||||||
|
message like this:
|
||||||
|
<programlisting>
|
||||||
|
ERROR: this node cannot attach to follow target node 3
|
||||||
|
DETAIL: follow target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/6108880</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In this case, it may be possible to have this node follow the new upstream
|
||||||
|
using <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
|
||||||
|
with the <option>--force-rewind</option> to execute <command>pg_rewind</command>.
|
||||||
|
This does mean that transactions which exist on this node, but not the new upstream,
|
||||||
|
will be lost.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
One of the following exit codes will be emitted by <command>repmgr standby follow</command>:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The follow operation succeeded; or if <option>--dry-run</option> was provided,
|
||||||
|
no issues were detected which would prevent the follow operation.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
A configuration issue was detected which prevented &repmgr; from
|
||||||
|
continuing with the follow operation.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_NO_RESTART (4)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The node could not be restarted.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_DB_CONN (6)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to establish a database connection to one of the nodes.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_FOLLOW_FAIL (23)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to complete the follow command.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-standby-follow-events">
|
<refsect1 id="repmgr-standby-follow-events">
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the primary
|
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the node
|
||||||
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||||
<literal>%a</literal> with its node name.
|
<literal>%a</literal> with its node name.
|
||||||
</para>
|
</para>
|
||||||
@@ -113,4 +256,3 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
</refentry>
|
</refentry>
|
||||||
|
|
||||||
|
|||||||
@@ -33,8 +33,26 @@
|
|||||||
Both values can be defined in <filename>repmgr.conf</filename>.
|
Both values can be defined in <filename>repmgr.conf</filename>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If WAL replay is paused on the standby, and not all WAL files on the standby have been
|
||||||
|
replayed, &repmgr; will not attempt to promote it.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This is because if WAL replay is paused, PostgreSQL itself will not react to a promote command
|
||||||
|
until WAL replay is resumed and all pending WAL has been replayed. This means
|
||||||
|
attempting to promote PostgreSQL in this state will leave PostgreSQL in a condition where the
|
||||||
|
promotion may occur at a unpredictable point in the future.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that if the standby is in archive recovery, &repmgr; will not be able to determine
|
||||||
|
if more WAL is pending replay, and will abort the promotion attempt if WAL replay is paused.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Example</title>
|
<title>Example</title>
|
||||||
<para>
|
<para>
|
||||||
@@ -50,6 +68,127 @@
|
|||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Options</title>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check if this node can be promoted, but don't carry out the promotion
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Configuration file settings</title>
|
||||||
|
<para>
|
||||||
|
The following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||||
|
promote operation:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<indexterm>
|
||||||
|
<primary>promote_check_interval</primary>
|
||||||
|
<secondary>with "repmgr standby promote "</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<simpara>
|
||||||
|
<literal>promote_check_interval</literal>:
|
||||||
|
interval (in seconds, default: 1 second) to wait between each check
|
||||||
|
to determine whether the standby has been promoted.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<indexterm>
|
||||||
|
<primary>promote_check_timeout</primary>
|
||||||
|
<secondary>with "repmgr standby promote "</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<simpara>
|
||||||
|
<literal>promote_check_timeout</literal>:
|
||||||
|
time (in seconds, default: 60 seconds) to wait to verify that the standby has been promoted
|
||||||
|
before exiting with <literal>ERR_PROMOTION_FAIL</literal>.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Exit codes</title>
|
||||||
|
<para>
|
||||||
|
Following exit codes can be emitted by <command>repmgr standby promote</command>:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>SUCCESS (0)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The standby was successfully promoted to primary.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_DB_CONN (6)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to connect to the local PostgreSQL node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL must be running before the node can be promoted.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_PROMOTION_FAIL (8)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The node could not be promoted to primary for one of the following
|
||||||
|
reasons:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
there is an existing primary node in the replication cluster
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
the node is not a standby
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
WAL replay is paused on the node
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
execution of the PostgreSQL promote command failed
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
<refsect1 id="repmgr-standby-promote-events">
|
<refsect1 id="repmgr-standby-promote-events">
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -37,7 +37,7 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; will refuse to perform the switchover if an exclusive backup is running on
|
&repmgr; will refuse to perform the switchover if an exclusive backup is running on
|
||||||
the current primary.
|
the current primary, or if WAL replay is paused on the standby.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
<para>
|
<para>
|
||||||
@@ -146,6 +146,7 @@
|
|||||||
|
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
|
||||||
<term><option>--siblings-follow</option></term>
|
<term><option>--siblings-follow</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
@@ -161,29 +162,45 @@
|
|||||||
<title>Configuration file settings</title>
|
<title>Configuration file settings</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
|
The following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||||
switchover operation:
|
switchover operation:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
</para>
|
||||||
|
|
||||||
<listitem>
|
<variablelist>
|
||||||
<simpara>
|
|
||||||
<literal>replication_lag_critical</literal>:
|
|
||||||
if replication lag (in seconds) on the standby exceeds this value, the
|
|
||||||
switchover will be aborted (unless the <literal>-F/--force</literal> option
|
|
||||||
is provided)
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
<varlistentry>
|
||||||
<simpara>
|
<indexterm>
|
||||||
<literal>shutdown_check_timeout</literal>: maximum number of seconds to wait for the
|
<primary>replication_lag_critical</primary>
|
||||||
demotion candidate (current primary) to shut down, before aborting the switchover.
|
<secondary>with "repmgr standby switchover"</secondary>
|
||||||
</simpara>
|
</indexterm>
|
||||||
<simpara>
|
|
||||||
Note that this parameter is set on the node where <command>repmgr standby switchover</command>
|
<term><option>replication_lag_critical</option></term>
|
||||||
is executed (promotion candidate); setting it on the demotion candidate (former primary) will
|
<listitem>
|
||||||
have no effect.
|
<para>
|
||||||
</simpara>
|
If replication lag (in seconds) on the standby exceeds this value, the
|
||||||
|
switchover will be aborted (unless the <literal>-F/--force</literal> option
|
||||||
|
is provided)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>shutdown_check_timeout</primary>
|
||||||
|
<secondary>with "repmgr standby switchover"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>shutdown_check_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The maximum number of seconds to wait for the
|
||||||
|
demotion candidate (current primary) to shut down, before aborting the switchover.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that this parameter is set on the node where <command>repmgr standby switchover</command>
|
||||||
|
is executed (promotion candidate); setting it on the demotion candidate (former primary) will
|
||||||
|
have no effect.
|
||||||
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
In versions prior to <link linkend="release-4.2">&repmgr; 4.2</link>, <command>repmgr standby switchover</command> would
|
In versions prior to <link linkend="release-4.2">&repmgr; 4.2</link>, <command>repmgr standby switchover</command> would
|
||||||
@@ -191,18 +208,73 @@
|
|||||||
to determine the timeout for demotion candidate shutdown.
|
to determine the timeout for demotion candidate shutdown.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<literal>standby_reconnect_timeout</literal>:
|
|
||||||
maximum number of seconds to attempt to wait for the demotion candidate (former primary)
|
|
||||||
to reconnect to the promoted primary (default: 60 seconds)
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
</itemizedlist>
|
<varlistentry>
|
||||||
</para>
|
<indexterm>
|
||||||
|
<primary>wal_receive_check_timeout</primary>
|
||||||
|
<secondary>with "repmgr standby switchover"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>wal_receive_check_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
After the primary has shut down, the maximum number of seconds to wait for the
|
||||||
|
walreceiver on the standby to flush WAL to disk before comparing WAL receive location
|
||||||
|
with the primary's shut down location.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>standby_reconnect_timeout</primary>
|
||||||
|
<secondary>with "repmgr standby switchover"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>standby_reconnect_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The maximum number of seconds to attempt to wait for the demotion candidate (former primary)
|
||||||
|
to reconnect to the promoted primary (default: 60 seconds)
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that this parameter is set on the node where <command>repmgr standby switchover</command>
|
||||||
|
is executed (promotion candidate); setting it on the demotion candidate (former primary) will
|
||||||
|
have no effect.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>node_rejoin_timeout</primary>
|
||||||
|
<secondary>with "repmgr standby switchover"</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<term><option>node_rejoin_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
maximum number of seconds to attempt to wait for the demotion candidate (former primary)
|
||||||
|
to reconnect to the promoted primary (default: 60 seconds)
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that this parameter is set on the the demotion candidate (former primary);
|
||||||
|
setting it on the node where <command>repmgr standby switchover</command> is
|
||||||
|
executed will have no effect.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
However, this value <emphasis>must</emphasis> be less than <option>standby_reconnect_timeout</option> on the
|
||||||
|
promotion candidate (the node where <command>repmgr standby switchover</command> is executed).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
@@ -213,12 +285,7 @@
|
|||||||
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
||||||
possible without actually changing the status of either node.
|
possible without actually changing the status of either node.
|
||||||
</para>
|
</para>
|
||||||
<important>
|
|
||||||
<para>
|
|
||||||
<application>repmgrd</application> must be shut down on all nodes while a switchover is being
|
|
||||||
executed. This restriction will be removed in a future &repmgr; version.
|
|
||||||
</para>
|
|
||||||
</important>
|
|
||||||
<para>
|
<para>
|
||||||
External database connections, e.g. from an application, should not be permitted while
|
External database connections, e.g. from an application, should not be permitted while
|
||||||
the switchover is taking place. In particular, active transactions on the primary
|
the switchover is taking place. In particular, active transactions on the primary
|
||||||
@@ -243,7 +310,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
<para>
|
<para>
|
||||||
Following exit codes can be emitted by <command>repmgr standby switchover</command>:
|
One of the following exit codes will be emitted by <command>repmgr standby switchover</command>:
|
||||||
</para>
|
</para>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
@@ -251,7 +318,8 @@
|
|||||||
<term><option>SUCCESS (0)</option></term>
|
<term><option>SUCCESS (0)</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
The switchover completed successfully.
|
The switchover completed successfully; or if <option>--dry-run</option> was provided,
|
||||||
|
no issues were detected which would prevent the switchover operation.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@@ -282,7 +350,10 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>See also</title>
|
<title>See also</title>
|
||||||
<para>
|
<para>
|
||||||
For more details see the section <xref linkend="performing-switchover">.
|
<xref linkend="repmgr-standby-follow">, <xref linkend="repmgr-node-rejoin">
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For more details on performing a switchover operation, see the section <xref linkend="performing-switchover">.
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|||||||
@@ -23,14 +23,27 @@
|
|||||||
use of the witness server with <application>repmgrd</application>.
|
use of the witness server with <application>repmgrd</application>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
When executing <command>repmgr witness register</command>, connection information
|
When executing <command>repmgr witness register</command>, database connection
|
||||||
for the cluster primary server must also be provided. &repmgr; will automatically
|
information for the cluster primary server must also be provided.
|
||||||
use the <varname>user</varname> and <varname>dbname</varname> values defined
|
|
||||||
in the <varname>conninfo</varname> string defined in the witness node's
|
|
||||||
<filename>repmgr.conf</filename>, if these are not explicitly provided.
|
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
In most cases it's only necessary to provide the primary's hostname with
|
||||||
|
the <option>-h</option>/<option>--host</option> option; &repmgr; will
|
||||||
|
automatically use the <varname>user</varname> and <varname>dbname</varname>
|
||||||
|
values defined in the <varname>conninfo</varname> string defined in the
|
||||||
|
witness node's <filename>repmgr.conf</filename>, unless these are explicitly
|
||||||
|
provided as command line options.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
The primary server must be registered with <command><link linkend="repmgr-primary-register">repmgr primary register</link></command> before the witness
|
||||||
|
server can be registered.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Execute with the <option>--dry-run</option> option to check what would happen
|
||||||
without actually registering the witness server.
|
without actually registering the witness server.
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|||||||
@@ -24,26 +24,26 @@
|
|||||||
<abstract>
|
<abstract>
|
||||||
<para>
|
<para>
|
||||||
This is the official documentation of &repmgr; &repmgrversion; for
|
This is the official documentation of &repmgr; &repmgrversion; for
|
||||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
use with PostgreSQL 9.3 - PostgreSQL 11.
|
||||||
It describes the functionality supported by the current version of &repmgr;.
|
It describes the functionality supported by the current version of &repmgr;.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; was developed by
|
&repmgr; is developed by
|
||||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||||
along with contributions from other individuals and companies.
|
along with contributions from other individuals and companies.
|
||||||
Contributions from the community are appreciated and welcome - get
|
Contributions from the community are appreciated and welcome - get
|
||||||
in touch via <ulink url="https://github.com/2ndQuadrant/repmgr">github</>
|
in touch via <ulink url="https://github.com/2ndQuadrant/repmgr">github</ulink>
|
||||||
or <ulink url="https://groups.google.com/group/repmgr">the mailing list/forum</>.
|
or <ulink url="https://groups.google.com/group/repmgr">the mailing list/forum</ulink>.
|
||||||
Multiple 2ndQuadrant customers contribute funding
|
Multiple 2ndQuadrant customers contribute funding
|
||||||
to make repmgr development possible.
|
to make repmgr development possible.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
2ndQuadrant, a Platinum sponsor of the PostgreSQL project,
|
&repmgr; is fully supported by 2ndQuadrant's
|
||||||
continues to develop repmgr to meet internal needs and those of customers.
|
<ulink url="https://www.2ndquadrant.com/en/support/support-postgresql/">24/7 Production Support</ulink>.
|
||||||
Other companies as well as individual developers
|
2ndQuadrant, a Major Sponsor of the PostgreSQL project, continues to develop and maintain &repmgr;.
|
||||||
are welcome to participate in the efforts.
|
Other companies as well as individual developers are welcome to participate in the efforts.
|
||||||
</para>
|
</para>
|
||||||
</abstract>
|
</abstract>
|
||||||
|
|
||||||
@@ -73,22 +73,16 @@
|
|||||||
&promoting-standby;
|
&promoting-standby;
|
||||||
&follow-new-primary;
|
&follow-new-primary;
|
||||||
&switchover;
|
&switchover;
|
||||||
&configuring-witness-server;
|
|
||||||
&event-notifications;
|
&event-notifications;
|
||||||
&upgrading-repmgr;
|
&upgrading-repmgr;
|
||||||
</part>
|
</part>
|
||||||
|
|
||||||
<part id="using-repmgrd">
|
<part id="using-repmgrd">
|
||||||
<title>Using repmgrd</title>
|
<title>Using repmgrd</title>
|
||||||
|
&repmgrd-overview;
|
||||||
&repmgrd-automatic-failover;
|
&repmgrd-automatic-failover;
|
||||||
&repmgrd-configuration;
|
&repmgrd-configuration;
|
||||||
&repmgrd-demonstration;
|
&repmgrd-operation;
|
||||||
&repmgrd-cascading-replication;
|
|
||||||
&repmgrd-network-split;
|
|
||||||
&repmgrd-witness-server;
|
|
||||||
&repmgrd-pausing;
|
|
||||||
&repmgrd-degraded-monitoring;
|
|
||||||
&repmgrd-monitoring;
|
|
||||||
&repmgrd-bdr;
|
&repmgrd-bdr;
|
||||||
</part>
|
</part>
|
||||||
|
|
||||||
@@ -108,12 +102,15 @@
|
|||||||
&repmgr-node-status;
|
&repmgr-node-status;
|
||||||
&repmgr-node-check;
|
&repmgr-node-check;
|
||||||
&repmgr-node-rejoin;
|
&repmgr-node-rejoin;
|
||||||
|
&repmgr-node-service;
|
||||||
&repmgr-cluster-show;
|
&repmgr-cluster-show;
|
||||||
&repmgr-cluster-matrix;
|
&repmgr-cluster-matrix;
|
||||||
&repmgr-cluster-crosscheck;
|
&repmgr-cluster-crosscheck;
|
||||||
&repmgr-cluster-event;
|
&repmgr-cluster-event;
|
||||||
&repmgr-cluster-cleanup;
|
&repmgr-cluster-cleanup;
|
||||||
&repmgr-daemon-status;
|
&repmgr-daemon-status;
|
||||||
|
&repmgr-daemon-start;
|
||||||
|
&repmgr-daemon-stop;
|
||||||
&repmgr-daemon-pause;
|
&repmgr-daemon-pause;
|
||||||
&repmgr-daemon-unpause;
|
&repmgr-daemon-unpause;
|
||||||
</part>
|
</part>
|
||||||
@@ -122,6 +119,7 @@
|
|||||||
&appendix-signatures;
|
&appendix-signatures;
|
||||||
&appendix-faq;
|
&appendix-faq;
|
||||||
&appendix-packages;
|
&appendix-packages;
|
||||||
|
&appendix-support;
|
||||||
|
|
||||||
<![%include-index;[&bookindex;]]>
|
<![%include-index;[&bookindex;]]>
|
||||||
<![%include-xslt-index;[<index id="bookindex"></index>]]>
|
<![%include-xslt-index;[<index id="bookindex"></index>]]>
|
||||||
|
|||||||
@@ -13,5 +13,285 @@
|
|||||||
providing monitoring information about the state of each standby.
|
providing monitoring information about the state of each standby.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>witness server</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>witness server</primary>
|
||||||
|
<secondary>repmgrd</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<title>Using a witness server</title>
|
||||||
|
<para>
|
||||||
|
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
||||||
|
is not part of the streaming replication cluster; its purpose is, if a
|
||||||
|
failover situation occurs, to provide proof that it is the primary server
|
||||||
|
itself which is unavailable, rather than e.g. a network split between
|
||||||
|
different physical locations.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
A typical use case for a witness server is a two-node streaming replication
|
||||||
|
setup, where the primary and standby are in different locations (data centres).
|
||||||
|
By creating a witness server in the same location (data centre) as the primary,
|
||||||
|
if the primary becomes unavailable it's possible for the standby to decide whether
|
||||||
|
it can promote itself without risking a "split brain" scenario: if it can't see either the
|
||||||
|
witness or the primary server, it's likely there's a network-level interruption
|
||||||
|
and it should not promote itself. If it can see the witness but not the primary,
|
||||||
|
this proves there is no network interruption and the primary itself is unavailable,
|
||||||
|
and it can therefore promote itself (and ideally take action to fence the
|
||||||
|
former primary).
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
<emphasis>Never</emphasis> install a witness server on the same physical host
|
||||||
|
as another node in the replication cluster managed by &repmgr; - it's essential
|
||||||
|
the witness is not affected in any way by failure of another node.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
||||||
|
be preferable to use location-based failover, which ensures that only nodes
|
||||||
|
in the same location as the primary will ever be promotion candidates;
|
||||||
|
see <xref linkend="repmgrd-network-split"> for more details.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
A witness server will only be useful if <application>repmgrd</application>
|
||||||
|
is in use.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<sect2 id="creating-witness-server">
|
||||||
|
<title>Creating a witness server</title>
|
||||||
|
<para>
|
||||||
|
To create a witness server, set up a normal PostgreSQL instance on a server
|
||||||
|
in the same physical location as the cluster's primary server.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This instance should <emphasis>not</emphasis> be on the same physical host as the primary server,
|
||||||
|
as otherwise if the primary server fails due to hardware issues, the witness
|
||||||
|
server will be lost too.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
||||||
|
command, which would automatically create a PostgreSQL instance. However
|
||||||
|
this often resulted in an unsatisfactory, hard-to-customise instance.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
The witness server should be configured in the same way as a normal
|
||||||
|
&repmgr; node; see section <xref linkend="configuration">.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Register the witness server with <xref linkend="repmgr-witness-register">.
|
||||||
|
This will create the &repmgr; extension on the witness server, and make
|
||||||
|
a copy of the &repmgr; metadata.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
As the witness server is not part of the replication cluster, further
|
||||||
|
changes to the &repmgr; metadata will be synchronised by
|
||||||
|
<application>repmgrd</application>.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
Once the witness server has been configured, <application>repmgrd</application>
|
||||||
|
should be started.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>network splits</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>network splits</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Handling network splits with repmgrd</title>
|
||||||
|
<para>
|
||||||
|
A common pattern for replication cluster setups is to spread servers over
|
||||||
|
more than one datacentre. This can provide benefits such as geographically-
|
||||||
|
distributed read replicas and DR (disaster recovery capability). However
|
||||||
|
this also means there is a risk of disconnection at network level between
|
||||||
|
datacentre locations, which would result in a split-brain scenario if
|
||||||
|
servers in a secondary data centre were no longer able to see the primary
|
||||||
|
in the main data centre and promoted a standby among themselves.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
&repmgr; enables provision of "<xref linkend="witness-server">" to
|
||||||
|
artificially create a quorum of servers in a particular location, ensuring
|
||||||
|
that nodes in another location will not elect a new primary if they
|
||||||
|
are unable to see the majority of nodes. However this approach does not
|
||||||
|
scale well, particularly with more complex replication setups, e.g.
|
||||||
|
where the majority of nodes are located outside of the primary datacentre.
|
||||||
|
It also means the <literal>witness</literal> node needs to be managed as an
|
||||||
|
extra PostgreSQL instance outside of the main replication cluster, which
|
||||||
|
adds administrative and programming complexity.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<literal>repmgr4</literal> introduces the concept of <literal>location</literal>:
|
||||||
|
each node is associated with an arbitrary location string (default is
|
||||||
|
<literal>default</literal>); this is set in <filename>repmgr.conf</filename>, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
node_id=1
|
||||||
|
node_name=node1
|
||||||
|
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
||||||
|
data_directory='/var/lib/postgresql/data'
|
||||||
|
location='dc1'</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In a failover situation, <application>repmgrd</application> will check if any servers in the
|
||||||
|
same location as the current primary node are visible. If not, <application>repmgrd</application>
|
||||||
|
will assume a network interruption and not promote any node in any
|
||||||
|
other location (it will however enter <link linkend="repmgrd-degraded-monitoring">degraded monitoring</link>
|
||||||
|
mode until a primary becomes visible).
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-standby-disconnection-on-failover" xreflabel="Standby disconnection on failover">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>standby disconnection on failover</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>standby disconnection on failover</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Standby disconnection on failover</title>
|
||||||
|
<para>
|
||||||
|
If <option>standby_disconnect_on_failover</option> is set to <literal>true</literal> in
|
||||||
|
<filename>repmgr.conf</filename>, in a failover situation <application>repmgrd</application> will forcibly disconnect
|
||||||
|
the local node's WAL receiver before making a failover decision.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
<option>standby_disconnect_on_failover</option> is available from PostgreSQL 9.5 and later.
|
||||||
|
Additionally this requires that the <literal>repmgr</literal> database user is a superuser.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
By doing this, it's possible to ensure that, at the point the failover decision is made, no nodes
|
||||||
|
are receiving data from the primary and their LSN location will be static.
|
||||||
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
<option>standby_disconnect_on_failover</option> <emphasis>must</emphasis> be set to the same value on
|
||||||
|
all nodes.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
<para>
|
||||||
|
Note that when using <option>standby_disconnect_on_failover</option> there will be a delay of 5 seconds
|
||||||
|
plus however many seconds it takes to confirm the WAL receiver is disconnected before
|
||||||
|
<application>repmgrd</application> proceeds with the failover decision.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Following the failover operation, no matter what the outcome, each node will reconnect its WAL receiver.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-failover-validation" xreflabel="Failover validation">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>failover validation</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>failover validation</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Failover validation</title>
|
||||||
|
<para>
|
||||||
|
From <link linkend="release-4.3">repmgr 4.3</link>, &repmgr; makes it possible to provide a script
|
||||||
|
to <application>repmgrd</application> which, in a failover situation,
|
||||||
|
will be executed by the promotion candidate (the node which has been selected
|
||||||
|
to be the new primary) to confirm whether the node should actually be promoted.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
To use this, <option>failover_validation_command</option> in <filename>repmgr.conf</filename>
|
||||||
|
to a script executable by the <literal>postgres</literal> system user, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
failover_validation_command=/path/to/script.sh %n %a</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The <literal>%n</literal> parameter will be replaced with the node ID, and the
|
||||||
|
<literal>%a</literal> parameter will be replaced by the node name when the script is executed.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This script must return an exit code of <literal>0</literal> to indicate the node should promote itself.
|
||||||
|
Any other value will result in the promotion being aborted and the election rerun.
|
||||||
|
There is a pause of <option>election_rerun_interval</option> seconds before the election is rerun.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Sample <application>repmgrd</application> log file output during which the failover validation
|
||||||
|
script rejects the proposed promotion candidate:
|
||||||
|
<programlisting>
|
||||||
|
[2019-03-13 21:01:30] [INFO] visible nodes: 2; total nodes: 2; no nodes have seen the primary within the last 4 seconds
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] promotion candidate is "node2" (ID: 2)
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] executing "failover_validation_command"
|
||||||
|
[2019-03-13 21:01:30] [DETAIL] /usr/local/bin/failover-validation.sh 2
|
||||||
|
[2019-03-13 21:01:30] [INFO] output returned by failover validation command:
|
||||||
|
Node ID: 2
|
||||||
|
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] failover validation command returned a non-zero value: "1"
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] promotion candidate election will be rerun
|
||||||
|
[2019-03-13 21:01:30] [INFO] 1 followers to notify
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] notifying node "node3" (node ID: 3) to rerun promotion candidate selection
|
||||||
|
INFO: node 3 received notification to rerun promotion candidate election
|
||||||
|
[2019-03-13 21:01:30] [NOTICE] rerunning election after 15 seconds ("election_rerun_interval")</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="cascading-replication" xreflabel="Cascading replication">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>cascading replication</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>cascading replication</primary>
|
||||||
|
<secondary>repmgrd</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>repmgrd and cascading replication</title>
|
||||||
|
<para>
|
||||||
|
Cascading replication - where a standby can connect to an upstream node and not
|
||||||
|
the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
|
||||||
|
<application>repmgrd</application> support cascading replication by keeping track of the relationship
|
||||||
|
between standby servers - each node record is stored with the node id of its
|
||||||
|
upstream ("parent") server (except of course the primary server).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In a failover situation where the primary node fails and a top-level standby
|
||||||
|
is promoted, a standby connected to another standby will not be affected
|
||||||
|
and continue working as normal (even if the upstream standby it's connected
|
||||||
|
to becomes the primary node). If however the node's direct upstream fails,
|
||||||
|
the "cascaded standby" will attempt to reconnect to that node's parent
|
||||||
|
(unless <varname>failover</varname> is set to <literal>manual</literal> in
|
||||||
|
<filename>repmgr.conf</filename>).
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
</chapter>
|
</chapter>
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
<chapter id="repmgrd-cascading-replication">
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>cascading replication</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>repmgrd and cascading replication</title>
|
|
||||||
<para>
|
|
||||||
Cascading replication - where a standby can connect to an upstream node and not
|
|
||||||
the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
|
|
||||||
<application>repmgrd</application> support cascading replication by keeping track of the relationship
|
|
||||||
between standby servers - each node record is stored with the node id of its
|
|
||||||
upstream ("parent") server (except of course the primary server).
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
In a failover situation where the primary node fails and a top-level standby
|
|
||||||
is promoted, a standby connected to another standby will not be affected
|
|
||||||
and continue working as normal (even if the upstream standby it's connected
|
|
||||||
to becomes the primary node). If however the node's direct upstream fails,
|
|
||||||
the "cascaded standby" will attempt to reconnect to that node's parent.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
@@ -5,7 +5,7 @@
|
|||||||
<secondary>configuration</secondary>
|
<secondary>configuration</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<title>repmgrd configuration</title>
|
<title>repmgrd setup and configuration</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> is a daemon which runs on each PostgreSQL node,
|
<application>repmgrd</application> is a daemon which runs on each PostgreSQL node,
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<sect1 id="repmgrd-basic-configuration">
|
<sect1 id="repmgrd-basic-configuration">
|
||||||
<title>repmgrd basic configuration</title>
|
<title>repmgrd configuration</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
To use <application>repmgrd</application>, its associated function library <emphasis>must</emphasis> be
|
To use <application>repmgrd</application>, its associated function library <emphasis>must</emphasis> be
|
||||||
@@ -31,79 +31,417 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Changing this setting requires a restart of PostgreSQL; for more details see
|
Changing this setting requires a restart of PostgreSQL; for more details see
|
||||||
the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
|
the <ulink url="https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The following configuraton options apply to <application>repmgrd</application> in all circumstances:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>monitor_interval_secs</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>monitor_interval_secs</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The interval (in seconds, default: <literal>2</literal>) to check the availability of the upstream node.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="connection-check-type">
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>connection_check_type</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>connection_check_type</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The option <option>connection_check_type</option> is used to select the method
|
||||||
|
<application>repmgrd</application> uses to determine whether the upstream node is available.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Possible values are:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<literal>ping</literal> (default) - uses <command>PQping()</command> to
|
||||||
|
determine server availability
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<literal>connection</literal> - determines server availability
|
||||||
|
by attempt ingto make a new connection to the upstream node
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<literal>query</literal> - determines server availability
|
||||||
|
by executing an SQL statement on the node via the existing connection
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>reconnect_attempts</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>reconnect_attempts</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The number of attempts (default: <literal>6</literal>) will be made to reconnect to an unreachable
|
||||||
|
upstream node before initiating a failover.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
There will be an interval of <option>reconnect_interval</option> seconds between each reconnection
|
||||||
|
attempt.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>reconnect_interval</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>reconnect_interval</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Interval (in seconds, default: <literal>10</literal>) between attempts to reconnect to an unreachable
|
||||||
|
upstream node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The number of reconnection attempts is defined by the parameter <option>reconnect_attempts</option>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>degraded_monitoring_timeout</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>degraded_monitoring_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Interval (in seconds) after which <application>repmgrd</application> will terminate if
|
||||||
|
either of the servers (local node and or upstream node) being monitored is no longer available
|
||||||
|
(<link linkend="repmgrd-degraded-monitoring">degraded monitoring mode</link>).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<literal>-1</literal> (default) disables this timeout completely.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
See also <filename><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</ulink></filename> for an annotated sample configuration file.
|
||||||
|
</para>
|
||||||
|
|
||||||
<sect2 id="repmgrd-automatic-failover-configuration">
|
<sect2 id="repmgrd-automatic-failover-configuration">
|
||||||
<title>automatic failover configuration</title>
|
<title>Required configuration for automatic failover</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
If using automatic failover, the following <application>repmgrd</application> options *must* be set in
|
The following <application>repmgrd</application> options <emphasis>must</emphasis> be set in
|
||||||
<filename>repmgr.conf</filename> :
|
<filename>repmgr.conf</filename>:
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara><option>failover</option></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><option>promote_command</option></simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><option>follow_command</option></simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
failover=automatic
|
failover=automatic
|
||||||
promote_command='/usr/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'
|
promote_command='/usr/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'
|
||||||
follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
|
follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Adjust file paths as appropriate; alway specify the full path to the &repmgr; binary.
|
Details of each option are as follows:
|
||||||
</para>
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
<note>
|
<indexterm>
|
||||||
<para>
|
<primary>failover</primary>
|
||||||
&repmgr; will not apply <option>pg_bindir</option> when executing <option>promote_command</option>
|
</indexterm>
|
||||||
or <option>follow_command</option>; these can be user-defined scripts so must always be
|
<term><option>failover</option></term>
|
||||||
specified with the full path.
|
<listitem>
|
||||||
</para>
|
<para>
|
||||||
</note>
|
<option>failover</option> can be one of <literal>automatic</literal> or <literal>manual</literal>.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If <option>failover</option> is set to <literal>manual</literal>, <application>repmgrd</application>
|
||||||
|
will not take any action if a failover situation is detected, and the node may need to
|
||||||
|
be modified manually (e.g. by executing <command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>).
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>promote_command</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>promote_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The program or script defined in <option>promote_command</option> will be executed
|
||||||
|
in a failover situation when <application>repmgrd</application> determines that
|
||||||
|
the current node is to become the new primary node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Normally <option>promote_command</option> is set as &repmgr;'s
|
||||||
|
<command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command> command.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
It is also possible to provide e.g. a shell script to e.g. perform user-defined tasks
|
||||||
|
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
||||||
|
at some point execute <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
||||||
|
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
||||||
|
&repmgr; will no longer function reliably.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Example:
|
||||||
|
<programlisting>
|
||||||
|
promote_command='/usr/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Note that the <literal>--log-to-file</literal> option will cause
|
||||||
|
output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
|
||||||
|
to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
&repmgr; will not apply <option>pg_bindir</option> when executing <option>promote_command</option>
|
||||||
|
or <option>follow_command</option>; these can be user-defined scripts so must always be
|
||||||
|
specified with the full path.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>follow_command</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>follow_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The program or script defined in <option>follow_command</option> will be executed
|
||||||
|
in a failover situation when <application>repmgrd</application> determines that
|
||||||
|
the current node is to follow the new primary node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Normally <option>follow_command</option> is set as &repmgr;'s
|
||||||
|
<command><link linkend="repmgr-standby-follow">repmgr standby promote</link></command> command.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The <option>follow_command</option> parameter
|
||||||
|
should provide the <literal>--upstream-node-id=%n</literal>
|
||||||
|
option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
|
||||||
|
<application>repmgrd</application> with the ID of the new primary node. If this is not provided,
|
||||||
|
<command>repmgr standby follow</command> will attempt to determine the new primary by itself, but if the
|
||||||
|
original primary comes back online after the new primary is promoted, there is a risk that
|
||||||
|
<command>repmgr standby follow</command> will result in the node continuing to follow
|
||||||
|
the original primary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
It is also possible to provide e.g. a shell script to e.g. perform user-defined tasks
|
||||||
|
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
||||||
|
at some point execute <command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>
|
||||||
|
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
||||||
|
&repmgr; will no longer function reliably.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Example:
|
||||||
|
<programlisting>
|
||||||
|
follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Note that the <literal>--log-to-file</literal> option will cause
|
||||||
|
output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
|
||||||
|
to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
&repmgr; will not apply <option>pg_bindir</option> when executing <option>promote_command</option>
|
||||||
|
or <option>follow_command</option>; these can be user-defined scripts so must always be
|
||||||
|
specified with the full path.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
<para>
|
|
||||||
Note that the <literal>--log-to-file</literal> option will cause
|
|
||||||
output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
|
|
||||||
to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
|
|
||||||
See <filename><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</ulink></filename>
|
|
||||||
for further <application>repmgrd</application>-specific settings.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
When <varname>failover</varname> is set to <literal>automatic</literal>, upon detecting failure
|
|
||||||
of the current primary, <application>repmgrd</application> will execute one of:
|
|
||||||
</para>
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<varname>promote_command</varname> (if the current server is to become the new primary)
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<varname>follow_command</varname> (if the current server needs to follow another server which has
|
|
||||||
become the new primary)
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
These commands can be any valid shell script which results in one of these
|
|
||||||
two actions happening, but if &repmgr;'s <command>standby follow</command> or
|
|
||||||
<command>standby promote</command>
|
|
||||||
commands are not executed (either directly as shown here, or from a script which
|
|
||||||
performs other actions), the &repmgr; metadata will not be updated and
|
|
||||||
&repmgr; will no longer function reliably.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The <varname>follow_command</varname> should provide the <literal>--upstream-node-id=%n</literal>
|
|
||||||
option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
|
|
||||||
<application>repmgrd</application> with the ID of the new primary node. If this is not provided, &repmgr;
|
|
||||||
will attempt to determine the new primary by itself, but if the
|
|
||||||
original primary comes back online after the new primary is promoted, there is a risk that
|
|
||||||
<command>repmgr standby follow</command> will result in the node continuing to follow
|
|
||||||
the original primary.
|
|
||||||
</para>
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="repmgrd-service-configuration">
|
<sect2 id="repmgrd-automatic-failover-configuration-optional">
|
||||||
|
<title>Optional configuration for automatic failover</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The following configuraton options can be use to fine-tune automatic failover:
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>priority</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>priority</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Indicates a preferred priority (default: <literal>100</literal>) for promoting nodes;
|
||||||
|
a value of zero prevents the node being promoted to primary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that the priority setting is only applied if two or more nodes are
|
||||||
|
determined as promotion candidates; in that case the node with the
|
||||||
|
higher priority is selected.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>failover_validation_command</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>failover_validation_command</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
User-defined script to execute for an external mechanism to validate the failover
|
||||||
|
decision made by <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
This option <emphasis>must</emphasis> be identically configured
|
||||||
|
on all nodes.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
One or both of the following parameter placeholders
|
||||||
|
should be provided, which will be replaced by repmgrd with the appropriate
|
||||||
|
value:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>%n</literal>: node ID</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara><literal>%a</literal>: node name</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
See also: <link linkend="repmgrd-failover-validation">Failover validation</link>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>standby_disconnect_on_failover</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>standby_disconnect_on_failover</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In a failover situation, disconnect the local node's WAL receiver.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This option is available from PostgreSQL 9.5 and later.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
This option <emphasis>must</emphasis> be identically configured
|
||||||
|
on all nodes.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Additionally the &repmgr; user <emphasis>must</emphasis> be a superuser
|
||||||
|
for this option.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> will refuse to start if this option is set
|
||||||
|
but either of these prerequisites is not met.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
See also: <link linkend="repmgrd-standby-disconnection-on-failover">Standby disconnection on failover</link>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The following options can be used to further fine-tune failover behaviour.
|
||||||
|
In practice it's unlikely these will need to be changed from their default
|
||||||
|
values, but are available as configuration options should the need arise.
|
||||||
|
</para>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>election_rerun_interval</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>election_rerun_interval</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If <option>failover_validation_command</option> is set, and the command returns
|
||||||
|
an error, pause the specified amount of seconds (default: 15) before rerunning the election.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>sibling_nodes_disconnect_timeout</primary>
|
||||||
|
</indexterm>
|
||||||
|
<term><option>sibling_nodes_disconnect_timeout</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If <option>standby_disconnect_on_failover</option> is <literal>true</literal>, the
|
||||||
|
maximum length of time (in seconds, default: <literal>30</literal>)
|
||||||
|
to wait for other standbys to confirm they have disconnected their
|
||||||
|
WAL receivers.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="postgresql-service-configuration">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>repmgrd</primary>
|
<primary>repmgrd</primary>
|
||||||
<secondary>PostgreSQL service configuration</secondary>
|
<secondary>PostgreSQL service configuration</secondary>
|
||||||
@@ -126,6 +464,42 @@
|
|||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="repmgrd-service-configuration">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>repmgrd service configuration</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<title>repmgrd service configuration</title>
|
||||||
|
<para>
|
||||||
|
If you are intending to use the <link linkend="repmgr-daemon-start"><command>repmgr daemon start</command></link>
|
||||||
|
and <link linkend="repmgr-daemon-stop"><command>repmgr daemon stop</command></link> commands, the following
|
||||||
|
parameters <emphasis>must</emphasis> be set in <filename>repmgr.conf</filename>:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara><varname>repmgrd_service_start_command</varname></simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara><varname>repmgrd_service_stop_command</varname></simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Example (for &repmgr; with PostgreSQL 11 on CentOS 7):
|
||||||
|
<programlisting>
|
||||||
|
repmgrd_service_start_command='sudo systemctl repmgr11 start'
|
||||||
|
repmgrd_service_stop_command='sudo systemctl repmgr11 stop'
|
||||||
|
</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For more details see the reference page for each command.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
<sect2 id="repmgrd-monitoring-configuration" xreflabel="repmgrd monitoring configuration">
|
<sect2 id="repmgrd-monitoring-configuration" xreflabel="repmgrd monitoring configuration">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>repmgrd</primary>
|
<primary>repmgrd</primary>
|
||||||
@@ -139,10 +513,8 @@
|
|||||||
in <filename>repmgr.conf</filename>.
|
in <filename>repmgr.conf</filename>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The default monitoring interval is 2 seconds; this value can be explicitly set using:
|
Monitoring data is written at the interval defined by
|
||||||
<programlisting>
|
the option <option>monitor_interval_secs</option> (see above).
|
||||||
monitor_interval_secs=<seconds></programlisting>
|
|
||||||
in <filename>repmgr.conf</filename>.
|
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For more details on monitoring, see <xref linkend="repmgrd-monitoring">.
|
For more details on monitoring, see <xref linkend="repmgrd-monitoring">.
|
||||||
@@ -192,6 +564,13 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<varname>connection_check_type</varname>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<varname>conninfo</varname>
|
<varname>conninfo</varname>
|
||||||
@@ -216,6 +595,12 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<varname>failover_validation_command</varname>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<varname>failover</varname>
|
<varname>failover</varname>
|
||||||
@@ -288,12 +673,30 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<varname>retry_promote_interval_secs</varname>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<varname>repmgrd_standby_startup_timeout</varname>
|
<varname>repmgrd_standby_startup_timeout</varname>
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<varname>sibling_nodes_disconnect_timeout</varname>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<varname>standby_disconnect_on_failover</varname>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@@ -348,7 +751,7 @@
|
|||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="repmgrd-daemon">
|
<sect1 id="repmgrd-daemon" xreflabel="repmgrd daemon">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>repmgrd</primary>
|
<primary>repmgrd</primary>
|
||||||
<secondary>starting and stopping</secondary>
|
<secondary>starting and stopping</secondary>
|
||||||
@@ -363,6 +766,20 @@
|
|||||||
See appendix <xref linkend="appendix-packages"> for details of service commands
|
See appendix <xref linkend="appendix-packages"> for details of service commands
|
||||||
for different distributions.
|
for different distributions.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
The commands <link linkend="repmgr-daemon-start"><command>repmgr daemon start</command></link> and
|
||||||
|
<link linkend="repmgr-daemon-stop"><command>repmgr daemon stop</command></link> can be used
|
||||||
|
as convenience wrappers to start and stop <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-daemon-start"><command>repmgr daemon start</command></link> and
|
||||||
|
<link linkend="repmgr-daemon-stop"><command>repmgr daemon stop</command></link> require
|
||||||
|
that the appropriate start/stop commands are configured as
|
||||||
|
<varname>repmgrd_service_start_command</varname> and <varname>repmgrd_service_stop_command</varname>
|
||||||
|
in <filename>repmgr.conf</filename>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> can be started manually like this:
|
<application>repmgrd</application> can be started manually like this:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
@@ -387,7 +804,7 @@
|
|||||||
<simpara>
|
<simpara>
|
||||||
This is a behaviour change from previous versions (earlier than 4.1), where
|
This is a behaviour change from previous versions (earlier than 4.1), where
|
||||||
the PID file had to be explicitly specified with the command line
|
the PID file had to be explicitly specified with the command line
|
||||||
parameter <option> --pid-file</option>.
|
parameter <option>--pid-file</option>.
|
||||||
</simpara>
|
</simpara>
|
||||||
</note>
|
</note>
|
||||||
<para>
|
<para>
|
||||||
@@ -407,7 +824,7 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If none of the above apply, <application>repmgrd</application> will create a PID file
|
If none of the above apply, <application>repmgrd</application> will create a PID file
|
||||||
in the operating system's temporary directory (das etermined by the environment variable
|
in the operating system's temporary directory (as setermined by the environment variable
|
||||||
<varname>TMPDIR</varname>, or if that is not set, will use <filename>/tmp</filename>).
|
<varname>TMPDIR</varname>, or if that is not set, will use <filename>/tmp</filename>).
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
@@ -478,9 +895,6 @@ REPMGRD_OPTS="--daemonize=false"
|
|||||||
<para>
|
<para>
|
||||||
From <application>repmgrd</application> 4.1, ensure <varname>REPMGRD_OPTS</varname> includes
|
From <application>repmgrd</application> 4.1, ensure <varname>REPMGRD_OPTS</varname> includes
|
||||||
<option>--daemonize=false</option>, as daemonization is handled by the service command.
|
<option>--daemonize=false</option>, as daemonization is handled by the service command.
|
||||||
We recommend setting <varname>repmgrd_pid_file</varname> in <filename>repmgr.conf</filename> to the
|
|
||||||
same value set in <varname>REPMGRD_PIDFILE</varname> to prevent another <application>repmgrd</application>
|
|
||||||
instance from being started manually.
|
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If using <application>systemd</application>, you may need to execute <command>systemctl daemon-reload</command>.
|
If using <application>systemd</application>, you may need to execute <command>systemctl daemon-reload</command>.
|
||||||
@@ -512,7 +926,7 @@ REPMGRD_OPTS="--daemonize=false"
|
|||||||
<para>
|
<para>
|
||||||
For further details on <varname>conninfo</varname> network connection
|
For further details on <varname>conninfo</varname> network connection
|
||||||
parameters, see the
|
parameters, see the
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS">PostgreSQL documentation</ulink>.
|
<ulink url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS">PostgreSQL documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|||||||
@@ -1,83 +0,0 @@
|
|||||||
<chapter id="repmgrd-degraded-monitoring" xreflabel="repmgrd degraded monitoring">
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>degraded monitoring</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>"degraded monitoring" mode</title>
|
|
||||||
<para>
|
|
||||||
In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
|
|
||||||
of monitoring the node's upstream server. In these cases it enters "degraded monitoring"
|
|
||||||
mode, where <application>repmgrd</application> remains active but is waiting for the situation
|
|
||||||
to be resolved.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Situations where this happens are:
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, no nodes in the primary node's location are visible</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, but no promotion candidate is available</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, but the promotion candidate could not be promoted</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, but the node was unable to follow the new primary</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, but no primary has become available</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>a failover situation has occurred, but automatic failover is not enabled for the node</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Example output in a situation where there is only one standby with <literal>failover=manual</literal>,
|
|
||||||
and the primary node is unavailable (but is later restarted):
|
|
||||||
<programlisting>
|
|
||||||
[2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)
|
|
||||||
[2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
|
||||||
[2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts
|
|
||||||
[2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
(...)
|
|
||||||
[2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts
|
|
||||||
[2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts
|
|
||||||
[2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate
|
|
||||||
[2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate
|
|
||||||
[2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node
|
|
||||||
[2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
|
||||||
[2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
|
||||||
[2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring
|
|
||||||
[2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)</programlisting>
|
|
||||||
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
|
||||||
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
|
||||||
after which <application>repmgrd</application> will terminate.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
If <application>repmgrd</application> is monitoring a primary mode which has been stopped
|
|
||||||
and manually restarted as a standby attached to a new primary, it will automatically detect
|
|
||||||
the status change and update the node record to reflect the node's new status
|
|
||||||
as an active standby. It will then resume monitoring the node as a standby.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
</chapter>
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
<chapter id="repmgrd-demonstration">
|
|
||||||
<title>repmgrd demonstration</title>
|
|
||||||
<para>
|
|
||||||
To demonstrate automatic failover, set up a 3-node replication cluster (one primary
|
|
||||||
and two standbys streaming directly from the primary) so that the cluster looks
|
|
||||||
something like this:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
|
||||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
|
||||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
|
||||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
|
||||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
|
||||||
log output, which at log level <literal>INFO</literal> will look like this:
|
|
||||||
<programlisting>
|
|
||||||
[2017-08-24 17:31:00] [NOTICE] using configuration file "/etc/repmgr.conf"
|
|
||||||
[2017-08-24 17:31:00] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr"
|
|
||||||
[2017-08-24 17:31:00] [NOTICE] starting monitoring of node <literal>node2</literal> (ID: 2)
|
|
||||||
[2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
|
||||||
Node ID | Name | Event | OK | Timestamp | Details
|
|
||||||
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
|
||||||
3 | node3 | repmgrd_start | t | 2017-08-24 17:35:54 | monitoring connection to upstream node "node1" (node ID: 1)
|
|
||||||
2 | node2 | repmgrd_start | t | 2017-08-24 17:35:50 | monitoring connection to upstream node "node1" (node ID: 1)
|
|
||||||
1 | node1 | repmgrd_start | t | 2017-08-24 17:35:46 | monitoring cluster primary "node1" (node ID: 1) </programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Now stop the current primary server with e.g.:
|
|
||||||
<programlisting>
|
|
||||||
pg_ctl -D /var/lib/postgresql/data -m immediate stop</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This will force the primary to shut down straight away, aborting all processes
|
|
||||||
and transactions. This will cause a flurry of activity in the <application>repmgrd</application> log
|
|
||||||
files as each <application>repmgrd</application> detects the failure of the primary and a failover
|
|
||||||
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
|
||||||
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
|
||||||
<programlisting>
|
|
||||||
[2017-08-24 23:32:01] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state
|
|
||||||
[2017-08-24 23:32:08] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
|
||||||
[2017-08-24 23:32:08] [INFO] checking state of node 1, 1 of 5 attempts
|
|
||||||
[2017-08-24 23:32:08] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-08-24 23:32:09] [INFO] checking state of node 1, 2 of 5 attempts
|
|
||||||
[2017-08-24 23:32:09] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-08-24 23:32:10] [INFO] checking state of node 1, 3 of 5 attempts
|
|
||||||
[2017-08-24 23:32:10] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-08-24 23:32:11] [INFO] checking state of node 1, 4 of 5 attempts
|
|
||||||
[2017-08-24 23:32:11] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-08-24 23:32:12] [INFO] checking state of node 1, 5 of 5 attempts
|
|
||||||
[2017-08-24 23:32:12] [WARNING] unable to reconnect to node 1 after 5 attempts
|
|
||||||
INFO: setting voting term to 1
|
|
||||||
INFO: node 2 is candidate
|
|
||||||
INFO: node 3 has received request from node 2 for electoral term 1 (our term: 0)
|
|
||||||
[2017-08-24 23:32:12] [NOTICE] this node is the winner, will now promote self and inform other nodes
|
|
||||||
INFO: connecting to standby database
|
|
||||||
NOTICE: promoting standby
|
|
||||||
DETAIL: promoting server using 'pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote'
|
|
||||||
INFO: reconnecting to promoted server
|
|
||||||
NOTICE: STANDBY PROMOTE successful
|
|
||||||
DETAIL: node 2 was successfully promoted to primary
|
|
||||||
INFO: node 3 received notification to follow node 2
|
|
||||||
[2017-08-24 23:32:13] [INFO] switching to primary monitoring mode</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
|
||||||
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
|
||||||
(<literal>node2</literal>):
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
|
||||||
----+-------+---------+-----------+----------+----------+----------------------------------------------------
|
|
||||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
|
||||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
|
||||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
|
||||||
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<command>repmgr cluster event</command> will display a summary of what happened to each server
|
|
||||||
during the failover:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster event
|
|
||||||
Node ID | Name | Event | OK | Timestamp | Details
|
|
||||||
---------+-------+--------------------------+----+---------------------+-----------------------------------------------------------------------------------
|
|
||||||
3 | node3 | repmgrd_failover_follow | t | 2017-08-24 23:32:16 | node 3 now following new upstream node 2
|
|
||||||
3 | node3 | standby_follow | t | 2017-08-24 23:32:16 | node 3 is now attached to node 2
|
|
||||||
2 | node2 | repmgrd_failover_promote | t | 2017-08-24 23:32:13 | node 2 promoted to primary; old primary 1 marked as failed
|
|
||||||
2 | node2 | standby_promote | t | 2017-08-24 23:32:13 | node 2 was successfully promoted to primary</programlisting>
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
<chapter id="repmgrd-monitoring" xreflabel="Monitoring with repmgrd">
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>monitoring</secondary>
|
|
||||||
</indexterm>
|
|
||||||
<indexterm>
|
|
||||||
<primary>monitoring</primary>
|
|
||||||
<secondary>with repmgrd</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>Monitoring with repmgrd</title>
|
|
||||||
<para>
|
|
||||||
When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
|
|
||||||
it will constantly write standby node status information to the
|
|
||||||
<varname>monitoring_history</varname> table, providing a near-real time
|
|
||||||
overview of replication status on all nodes
|
|
||||||
in the cluster.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The view <literal>replication_status</literal> shows the most recent state
|
|
||||||
for each node, e.g.:
|
|
||||||
<programlisting>
|
|
||||||
repmgr=# select * from repmgr.replication_status;
|
|
||||||
-[ RECORD 1 ]-------------+------------------------------
|
|
||||||
primary_node_id | 1
|
|
||||||
standby_node_id | 2
|
|
||||||
standby_name | node2
|
|
||||||
node_type | standby
|
|
||||||
active | t
|
|
||||||
last_monitor_time | 2017-08-24 16:28:41.260478+09
|
|
||||||
last_wal_primary_location | 0/6D57A00
|
|
||||||
last_wal_standby_location | 0/5000000
|
|
||||||
replication_lag | 29 MB
|
|
||||||
replication_time_lag | 00:00:11.736163
|
|
||||||
apply_lag | 15 MB
|
|
||||||
communication_time_lag | 00:00:01.365643</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The interval in which monitoring history is written is controlled by the
|
|
||||||
configuration parameter <varname>monitor_interval_secs</varname>;
|
|
||||||
default is 2.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
As this can generate a large amount of monitoring data in the table
|
|
||||||
<literal>repmgr.monitoring_history</literal>. it's advisable to regularly
|
|
||||||
purge historical data using the <xref linkend="repmgr-cluster-cleanup">
|
|
||||||
command; use the <literal>-k/--keep-history</literal> option to
|
|
||||||
specify how many day's worth of data should be retained.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
It's possible to use <application>repmgrd</application> to run in monitoring
|
|
||||||
mode only (without automatic failover capability) for some or all
|
|
||||||
nodes by setting <literal>failover=manual</literal> in the node's
|
|
||||||
<filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
|
|
||||||
no failover action will be taken and the node will require manual intervention to
|
|
||||||
be reattached to replication. If this occurs, an
|
|
||||||
<link linkend="event-notifications">event notification</link>
|
|
||||||
<varname>standby_disconnect_manual</varname> will be created.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Note that when a standby node is not streaming directly from its upstream
|
|
||||||
node, e.g. recovering WAL from an archive, <varname>apply_lag</varname> will always appear as
|
|
||||||
<literal>0 bytes</literal>.
|
|
||||||
</para>
|
|
||||||
<tip>
|
|
||||||
<para>
|
|
||||||
If monitoring history is enabled, the contents of the <literal>repmgr.monitoring_history</literal>
|
|
||||||
table will be replicated to attached standbys. This means there will be a small but
|
|
||||||
constant stream of replication activity which may not be desirable. To prevent
|
|
||||||
this, convert the table to an <literal>UNLOGGED</literal> one with:
|
|
||||||
<programlisting>
|
|
||||||
ALTER TABLE repmgr.monitoring_history SET UNLOGGED;</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This will however mean that monitoring history will not be available on
|
|
||||||
another node following a failover, and the view <literal>repmgr.replication_status</literal>
|
|
||||||
will not work on standbys.
|
|
||||||
</para>
|
|
||||||
</tip>
|
|
||||||
</chapter>
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
<chapter id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>network splits</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>Handling network splits with repmgrd</title>
|
|
||||||
<para>
|
|
||||||
A common pattern for replication cluster setups is to spread servers over
|
|
||||||
more than one datacentre. This can provide benefits such as geographically-
|
|
||||||
distributed read replicas and DR (disaster recovery capability). However
|
|
||||||
this also means there is a risk of disconnection at network level between
|
|
||||||
datacentre locations, which would result in a split-brain scenario if
|
|
||||||
servers in a secondary data centre were no longer able to see the primary
|
|
||||||
in the main data centre and promoted a standby among themselves.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
&repmgr; enables provision of "<xref linkend="witness-server">" to
|
|
||||||
artificially create a quorum of servers in a particular location, ensuring
|
|
||||||
that nodes in another location will not elect a new primary if they
|
|
||||||
are unable to see the majority of nodes. However this approach does not
|
|
||||||
scale well, particularly with more complex replication setups, e.g.
|
|
||||||
where the majority of nodes are located outside of the primary datacentre.
|
|
||||||
It also means the <literal>witness</literal> node needs to be managed as an
|
|
||||||
extra PostgreSQL instance outside of the main replication cluster, which
|
|
||||||
adds administrative and programming complexity.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<literal>repmgr4</literal> introduces the concept of <literal>location</literal>:
|
|
||||||
each node is associated with an arbitrary location string (default is
|
|
||||||
<literal>default</literal>); this is set in <filename>repmgr.conf</filename>, e.g.:
|
|
||||||
<programlisting>
|
|
||||||
node_id=1
|
|
||||||
node_name=node1
|
|
||||||
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
|
||||||
data_directory='/var/lib/postgresql/data'
|
|
||||||
location='dc1'</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
In a failover situation, <application>repmgrd</application> will check if any servers in the
|
|
||||||
same location as the current primary node are visible. If not, <application>repmgrd</application>
|
|
||||||
will assume a network interruption and not promote any node in any
|
|
||||||
other location (it will however enter <link linkend="repmgrd-degraded-monitoring">degraded monitoring</link>
|
|
||||||
mode until a primary becomes visible).
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
386
doc/repmgrd-operation.sgml
Normal file
386
doc/repmgrd-operation.sgml
Normal file
@@ -0,0 +1,386 @@
|
|||||||
|
<chapter id="repmgrd-operation" xreflabel="repmgrd operation">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>operation</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>repmgrd operation</title>
|
||||||
|
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-pausing">
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>pausing</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pausing repmgrd</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Pausing repmgrd</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
In normal operation, <application>repmgrd</application> monitors the state of the
|
||||||
|
PostgreSQL node it is running on, and will take appropriate action if problems
|
||||||
|
are detected, e.g. (if so configured) promote the node to primary, if the existing
|
||||||
|
primary has been determined as failed.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
However, <application>repmgrd</application> is unable to distinguish between
|
||||||
|
planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
|
||||||
|
or installing PostgreSQL maintenance released), and an actual server outage. In versions prior to
|
||||||
|
&repmgr; 4.2 it was necessary to stop <application>repmgrd</application> on all nodes (or at least
|
||||||
|
on all nodes where <application>repmgrd</application> is
|
||||||
|
<link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
|
||||||
|
to prevent <application>repmgrd</application> from making unintentional changes to the
|
||||||
|
replication cluster.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
|
||||||
|
can now be "paused", i.e. instructed not to take any action such as performing a failover.
|
||||||
|
This can be done from any node in the cluster, removing the need to stop/restart
|
||||||
|
each <application>repmgrd</application> individually.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
For major PostgreSQL upgrades, e.g. from PostgreSQL 10 to PostgreSQL 11,
|
||||||
|
<application>repmgrd</application> should be shut down completely and only started up
|
||||||
|
once the &repmgr; packages for the new PostgreSQL major version have been installed.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<sect2 id="repmgrd-pausing-prerequisites">
|
||||||
|
<title>Prerequisites for pausing <application>repmgrd</application></title>
|
||||||
|
<para>
|
||||||
|
In order to be able to pause/unpause <application>repmgrd</application>, following
|
||||||
|
prerequisites must be met:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
PostgreSQL on all nodes must be accessible from the node where the
|
||||||
|
<literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
|
||||||
|
<varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
These conditions are required for normal &repmgr; operation in any case.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="repmgrd-pausing-execution">
|
||||||
|
<title>Pausing/unpausing <application>repmgrd</application></title>
|
||||||
|
<para>
|
||||||
|
To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf daemon pause
|
||||||
|
NOTICE: node 1 (node1) paused
|
||||||
|
NOTICE: node 2 (node2) paused
|
||||||
|
NOTICE: node 3 (node3) paused</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The state of <application>repmgrd</application> on each node can be checked with
|
||||||
|
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
|
||||||
|
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||||
|
ID | Name | Role | Status | repmgrd | PID | Paused?
|
||||||
|
----+-------+---------+---------+---------+------+---------
|
||||||
|
1 | node1 | primary | running | running | 7851 | yes
|
||||||
|
2 | node2 | standby | running | running | 7889 | yes
|
||||||
|
3 | node3 | standby | running | running | 7918 | yes</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If executing a switchover with <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
|
&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
|
||||||
|
running on one of the standbys (here: <literal>node2</literal>) will react like this:
|
||||||
|
<programlisting>
|
||||||
|
[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||||
|
[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
|
||||||
|
[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||||
|
...
|
||||||
|
[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||||
|
[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
|
||||||
|
[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||||
|
[2018-09-20 12:22:25] [NOTICE] node is paused
|
||||||
|
[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
|
||||||
|
[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
|
||||||
|
[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
|
||||||
|
will automatically reconnect, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf daemon unpause
|
||||||
|
NOTICE: node 1 (node1) unpaused
|
||||||
|
NOTICE: node 2 (node2) unpaused
|
||||||
|
NOTICE: node 3 (node3) unpaused</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If the previous primary is no longer accessible when <application>repmgrd</application>
|
||||||
|
is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
|
||||||
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
|
||||||
|
and any standbys attached to the new primary with
|
||||||
|
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
||||||
|
resulting in the automatic promotion of a new primary, which may be a problem particularly
|
||||||
|
in larger clusters, where <application>repmgrd</application> could select a different promotion
|
||||||
|
candidate to the one intended by the administrator.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</sect2>
|
||||||
|
<sect2 id="repmgrd-pausing-details">
|
||||||
|
<title>Details on the <application>repmgrd</application> pausing mechanism</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The pause state of each node will be stored over a PostgreSQL restart.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
||||||
|
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
|
||||||
|
executed even if <application>repmgrd</application> is not running; in this case,
|
||||||
|
<application>repmgrd</application> will start up in whichever pause state has been set.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
||||||
|
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
||||||
|
<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</sect2>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-wal-replay-pause">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>paused WAL replay</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>repmgrd and paused WAL replay</title>
|
||||||
|
<para>
|
||||||
|
If WAL replay has been paused (using <command>pg_wal_replay_pause()</command>,
|
||||||
|
on PostgreSQL 9.6 and earlier <command>pg_xlog_replay_pause()</command>),
|
||||||
|
in a failover situation <application>repmgrd</application> will
|
||||||
|
automatically resume WAL replay.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This is because if WAL replay is paused, but WAL is pending replay,
|
||||||
|
PostgreSQL cannot be promoted until WAL replay is resumed.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
||||||
|
will refuse to promote a node in this state, as the PostgreSQL
|
||||||
|
<command>promote</command> command will not be acted on until
|
||||||
|
WAL replay is resumed, leaving the cluster in a potentially
|
||||||
|
unstable state. In this case it is up to the user to
|
||||||
|
decide whether to resume WAL replay.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-degraded-monitoring" xreflabel="repmgrd degraded monitoring">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>degraded monitoring</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>degraded monitoring</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>"degraded monitoring" mode</title>
|
||||||
|
<para>
|
||||||
|
In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
|
||||||
|
of monitoring the node's upstream server. In these cases it enters "degraded monitoring"
|
||||||
|
mode, where <application>repmgrd</application> remains active but is waiting for the situation
|
||||||
|
to be resolved.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Situations where this happens are:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, no nodes in the primary node's location are visible</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, but no promotion candidate is available</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, but the promotion candidate could not be promoted</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, but the node was unable to follow the new primary</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, but no primary has become available</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>a failover situation has occurred, but automatic failover is not enabled for the node</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example output in a situation where there is only one standby with <literal>failover=manual</literal>,
|
||||||
|
and the primary node is unavailable (but is later restarted):
|
||||||
|
<programlisting>
|
||||||
|
[2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)
|
||||||
|
[2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||||
|
[2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts
|
||||||
|
[2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||||
|
(...)
|
||||||
|
[2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts
|
||||||
|
[2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||||
|
[2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate
|
||||||
|
[2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate
|
||||||
|
[2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node
|
||||||
|
[2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||||
|
[2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||||
|
[2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring
|
||||||
|
[2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)</programlisting>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
||||||
|
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
||||||
|
after which <application>repmgrd</application> will terminate.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If <application>repmgrd</application> is monitoring a primary mode which has been stopped
|
||||||
|
and manually restarted as a standby attached to a new primary, it will automatically detect
|
||||||
|
the status change and update the node record to reflect the node's new status
|
||||||
|
as an active standby. It will then resume monitoring the node as a standby.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-monitoring" xreflabel="Storing monitoring data">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>monitoring</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<indexterm>
|
||||||
|
<primary>monitoring</primary>
|
||||||
|
<secondary>with repmgrd</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>Storing monitoring data</title>
|
||||||
|
<para>
|
||||||
|
When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
|
||||||
|
it will constantly write standby node status information to the
|
||||||
|
<varname>monitoring_history</varname> table, providing a near-real time
|
||||||
|
overview of replication status on all nodes
|
||||||
|
in the cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The view <literal>replication_status</literal> shows the most recent state
|
||||||
|
for each node, e.g.:
|
||||||
|
<programlisting>
|
||||||
|
repmgr=# select * from repmgr.replication_status;
|
||||||
|
-[ RECORD 1 ]-------------+------------------------------
|
||||||
|
primary_node_id | 1
|
||||||
|
standby_node_id | 2
|
||||||
|
standby_name | node2
|
||||||
|
node_type | standby
|
||||||
|
active | t
|
||||||
|
last_monitor_time | 2017-08-24 16:28:41.260478+09
|
||||||
|
last_wal_primary_location | 0/6D57A00
|
||||||
|
last_wal_standby_location | 0/5000000
|
||||||
|
replication_lag | 29 MB
|
||||||
|
replication_time_lag | 00:00:11.736163
|
||||||
|
apply_lag | 15 MB
|
||||||
|
communication_time_lag | 00:00:01.365643</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The interval in which monitoring history is written is controlled by the
|
||||||
|
configuration parameter <varname>monitor_interval_secs</varname>;
|
||||||
|
default is 2.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
As this can generate a large amount of monitoring data in the table
|
||||||
|
<literal>repmgr.monitoring_history</literal>. it's advisable to regularly
|
||||||
|
purge historical data using the <xref linkend="repmgr-cluster-cleanup">
|
||||||
|
command; use the <literal>-k/--keep-history</literal> option to
|
||||||
|
specify how many day's worth of data should be retained.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
It's possible to use <application>repmgrd</application> to run in monitoring
|
||||||
|
mode only (without automatic failover capability) for some or all
|
||||||
|
nodes by setting <literal>failover=manual</literal> in the node's
|
||||||
|
<filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
|
||||||
|
no failover action will be taken and the node will require manual intervention to
|
||||||
|
be reattached to replication. If this occurs, an
|
||||||
|
<link linkend="event-notifications">event notification</link>
|
||||||
|
<varname>standby_disconnect_manual</varname> will be created.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that when a standby node is not streaming directly from its upstream
|
||||||
|
node, e.g. recovering WAL from an archive, <varname>apply_lag</varname> will always appear as
|
||||||
|
<literal>0 bytes</literal>.
|
||||||
|
</para>
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
If monitoring history is enabled, the contents of the <literal>repmgr.monitoring_history</literal>
|
||||||
|
table will be replicated to attached standbys. This means there will be a small but
|
||||||
|
constant stream of replication activity which may not be desirable. To prevent
|
||||||
|
this, convert the table to an <literal>UNLOGGED</literal> one with:
|
||||||
|
<programlisting>
|
||||||
|
ALTER TABLE repmgr.monitoring_history SET UNLOGGED;</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This will however mean that monitoring history will not be available on
|
||||||
|
another node following a failover, and the view <literal>repmgr.replication_status</literal>
|
||||||
|
will not work on standbys.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
</chapter>
|
||||||
187
doc/repmgrd-overview.sgml
Normal file
187
doc/repmgrd-overview.sgml
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
<chapter id="repmgrd-overview" xreflabel="repmgrd overview">
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>overview</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<title>repmgrd overview</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> ("<literal>replication manager daemon</literal>")
|
||||||
|
is a management and monitoring daemon which runs
|
||||||
|
on each node in a replication cluster. It can automate actions such as
|
||||||
|
failover and updating standbys to follow the new primary, as well as
|
||||||
|
providing monitoring information about the state of each standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> is designed to be straightforward to set up
|
||||||
|
and does not require additional external infrastructure.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Functionality provided by <application>repmgrd</application> includes:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
wide range of <link linkend="repmgrd-basic-configuration">configuration options</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
option to execute custom scripts ("<link linkend="event-notifications">event notifications</link>
|
||||||
|
at different points in the failover sequence
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
ability to <link linkend="repmgrd-pausing">pause repmgrd</link>
|
||||||
|
operation on all nodes with a
|
||||||
|
<link linkend="repmgr-daemon-pause"><command>single command</command></link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
optional <link linkend="repmgrd-witness-server">witness server</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
"location" configuration option to restrict
|
||||||
|
potential promotion candidates to a single location
|
||||||
|
(e.g. when nodes are spread over multiple data centres)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="connection-check-type">choice of method</link> to determine node availability
|
||||||
|
(PostgreSQL ping, query execution or new connection)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
retention of monitoring statistics (optional)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<sect1 id="repmgrd-demonstration">
|
||||||
|
|
||||||
|
<title>repmgrd demonstration</title>
|
||||||
|
<para>
|
||||||
|
To demonstrate automatic failover, set up a 3-node replication cluster (one primary
|
||||||
|
and two standbys streaming directly from the primary) so that the cluster looks
|
||||||
|
something like this:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||||
|
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||||
|
----+-------+---------+-----------+----------+----------+-------
|
||||||
|
1 | node1 | primary | * running | | default | 100
|
||||||
|
2 | node2 | standby | running | node1 | default | 100
|
||||||
|
3 | node3 | standby | running | node1 | default | 100</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
See section <link linkend="repmgrd-automatic-failover-configuration">Required configuration for automatic failover</link>
|
||||||
|
for an example of minimal <filename>repmgr.conf</filename> file settings suitable for use with <application>repmgrd</application>.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
<para>
|
||||||
|
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
||||||
|
log output, which at log level <literal>INFO</literal> will look like this:
|
||||||
|
<programlisting>
|
||||||
|
[2019-03-15 06:32:05] [NOTICE] repmgrd (repmgrd 4.3) starting up
|
||||||
|
[2019-03-15 06:32:05] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr connect_timeout=2"
|
||||||
|
INFO: set_repmgrd_pid(): provided pidfile is /var/run/repmgr/repmgrd-11.pid
|
||||||
|
[2019-03-15 06:32:05] [NOTICE] starting monitoring of node "node2" (ID: 2)
|
||||||
|
[2019-03-15 06:32:05] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
||||||
|
Node ID | Name | Event | OK | Timestamp | Details
|
||||||
|
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
||||||
|
3 | node3 | repmgrd_start | t | 2019-03-14 04:17:30 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||||
|
2 | node2 | repmgrd_start | t | 2019-03-14 04:11:47 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||||
|
1 | node1 | repmgrd_start | t | 2019-03-14 04:04:31 | monitoring cluster primary "node1" (node ID: 1)</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Now stop the current primary server with e.g.:
|
||||||
|
<programlisting>
|
||||||
|
pg_ctl -D /var/lib/postgresql/data -m immediate stop</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This will force the primary to shut down straight away, aborting all processes
|
||||||
|
and transactions. This will cause a flurry of activity in the <application>repmgrd</application> log
|
||||||
|
files as each <application>repmgrd</application> detects the failure of the primary and a failover
|
||||||
|
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
||||||
|
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
||||||
|
<programlisting>
|
||||||
|
[2019-03-15 06:37:50] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||||
|
[2019-03-15 06:37:50] [INFO] checking state of node 1, 1 of 3 attempts
|
||||||
|
[2019-03-15 06:37:50] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||||
|
[2019-03-15 06:37:55] [INFO] checking state of node 1, 2 of 3 attempts
|
||||||
|
[2019-03-15 06:37:55] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||||
|
[2019-03-15 06:38:00] [INFO] checking state of node 1, 3 of 3 attempts
|
||||||
|
[2019-03-15 06:38:00] [WARNING] unable to reconnect to node 1 after 3 attempts
|
||||||
|
[2019-03-15 06:38:00] [INFO] primary and this node have the same location ("default")
|
||||||
|
[2019-03-15 06:38:00] [INFO] local node's last receive lsn: 0/900CBF8
|
||||||
|
[2019-03-15 06:38:00] [INFO] node 3 last saw primary node 12 second(s) ago
|
||||||
|
[2019-03-15 06:38:00] [INFO] last receive LSN for sibling node "node3" (ID: 3) is: 0/900CBF8
|
||||||
|
[2019-03-15 06:38:00] [INFO] node "node3" (ID: 3) has same LSN as current candidate "node2" (ID: 2)
|
||||||
|
[2019-03-15 06:38:00] [INFO] visible nodes: 2; total nodes: 2; no nodes have seen the primary within the last 4 seconds
|
||||||
|
[2019-03-15 06:38:00] [NOTICE] promotion candidate is "node2" (ID: 2)
|
||||||
|
[2019-03-15 06:38:00] [NOTICE] this node is the winner, will now promote itself and inform other nodes
|
||||||
|
[2019-03-15 06:38:00] [INFO] promote_command is:
|
||||||
|
"/usr/pgsql-11/bin/repmgr -f /etc/repmgr/11/repmgr.conf standby promote"
|
||||||
|
NOTICE: promoting standby to primary
|
||||||
|
DETAIL: promoting server "node2" (ID: 2) using "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"
|
||||||
|
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
|
||||||
|
NOTICE: STANDBY PROMOTE successful
|
||||||
|
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
|
||||||
|
[2019-03-15 06:38:01] [INFO] 3 followers to notify
|
||||||
|
[2019-03-15 06:38:01] [NOTICE] notifying node "node3" (node ID: 3) to follow node 2
|
||||||
|
INFO: node 3 received notification to follow node 2
|
||||||
|
[2019-03-15 06:38:01] [INFO] switching to primary monitoring mode
|
||||||
|
[2019-03-15 06:38:01] [NOTICE] monitoring cluster primary "node2" (node ID: 2)</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
||||||
|
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
||||||
|
(<literal>node2</literal>):
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||||
|
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||||
|
----+-------+---------+-----------+----------+----------+-------
|
||||||
|
1 | node1 | primary | - failed | | default | 100
|
||||||
|
2 | node2 | primary | * running | | default | 100
|
||||||
|
3 | node3 | standby | running | node2 | default | 100</programlisting>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-cluster-event"><command>repmgr cluster event</command></link> will display a summary of
|
||||||
|
what happened to each server during the failover:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster event
|
||||||
|
Node ID | Name | Event | OK | Timestamp | Details
|
||||||
|
---------+-------+----------------------------+----+---------------------+-------------------------------------------------------------
|
||||||
|
3 | node3 | repmgrd_failover_follow | t | 2019-03-15 06:38:03 | node 3 now following new upstream node 2
|
||||||
|
3 | node3 | standby_follow | t | 2019-03-15 06:38:02 | standby attached to upstream node "node2" (node ID: 2)
|
||||||
|
2 | node2 | repmgrd_reload | t | 2019-03-15 06:38:01 | monitoring cluster primary "node2" (node ID: 2)
|
||||||
|
2 | node2 | repmgrd_failover_promote | t | 2019-03-15 06:38:01 | node 2 promoted to primary; old primary 1 marked as failed
|
||||||
|
2 | node2 | standby_promote | t | 2019-03-15 06:38:01 | server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
</chapter>
|
||||||
@@ -1,169 +0,0 @@
|
|||||||
<chapter id="repmgrd-pausing" xreflabel="Pausing repmgrd">
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>pausing</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>pausing repmgrd</primary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>Pausing repmgrd</title>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
In normal operation, <application>repmgrd</application> monitors the state of the
|
|
||||||
PostgreSQL node it is running on, and will take appropriate action if problems
|
|
||||||
are detected, e.g. (if so configured) promote the node to primary, if the existing
|
|
||||||
primary has been determined as failed.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
However, <application>repmgrd</application> is unable to distinguish between
|
|
||||||
planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
|
|
||||||
or upgrading a server), and an actual server outage. In versions prior to &repmgr; 4.2
|
|
||||||
it was necessary to stop <application>repmgrd</application> on all nodes (or at least
|
|
||||||
on all nodes where <application>repmgrd</application> is
|
|
||||||
<link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
|
|
||||||
to prevent <application>repmgrd</application> from making changes to the
|
|
||||||
replication cluster.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
|
|
||||||
can now be "paused", i.e. instructed not to take any action such as performing a failover.
|
|
||||||
This can be done from any node in the cluster, removing the need to stop/restart
|
|
||||||
each <application>repmgrd</application> individually.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<sect1 id="repmgrd-pausing-prerequisites">
|
|
||||||
<title>Prerequisites for pausing <application>repmgrd</application></title>
|
|
||||||
<para>
|
|
||||||
In order to be able to pause/unpause <application>repmgrd</application>, following
|
|
||||||
prerequisites must be met:
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
PostgreSQL on all nodes must be accessible from the node where the
|
|
||||||
<literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
|
|
||||||
<varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
These conditions are required for normal &repmgr; operation in any case.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="repmgrd-pausing-execution">
|
|
||||||
<title>Pausing/unpausing <application>repmgrd</application></title>
|
|
||||||
<para>
|
|
||||||
To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf daemon pause
|
|
||||||
NOTICE: node 1 (node1) paused
|
|
||||||
NOTICE: node 2 (node2) paused
|
|
||||||
NOTICE: node 3 (node3) paused</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The state of <application>repmgrd</application> on each node can be checked with
|
|
||||||
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
|
|
||||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
|
||||||
ID | Name | Role | Status | repmgrd | PID | Paused?
|
|
||||||
----+-------+---------+---------+---------+------+---------
|
|
||||||
1 | node1 | primary | running | running | 7851 | yes
|
|
||||||
2 | node2 | standby | running | running | 7889 | yes
|
|
||||||
3 | node3 | standby | running | running | 7918 | yes</programlisting>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
If executing a switchover with <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
|
||||||
&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
|
|
||||||
running on one of the standbys (here: <literal>node2</literal>) will react like this:
|
|
||||||
<programlisting>
|
|
||||||
[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
|
||||||
[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
|
|
||||||
[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
...
|
|
||||||
[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
|
|
||||||
[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
|
|
||||||
[2018-09-20 12:22:25] [NOTICE] node is paused
|
|
||||||
[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
|
|
||||||
[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
|
|
||||||
[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
|
|
||||||
will automatically reconnect, e.g.:
|
|
||||||
<programlisting>
|
|
||||||
[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf daemon pause
|
|
||||||
NOTICE: node 1 (node1) unpaused
|
|
||||||
NOTICE: node 2 (node2) unpaused
|
|
||||||
NOTICE: node 3 (node3) unpaused</programlisting>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
If the previous primary is no longer accessible when <application>repmgrd</application>
|
|
||||||
is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
|
|
||||||
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
|
|
||||||
and any standbys attached to the new primary with
|
|
||||||
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
|
||||||
resulting in the automatic promotion of a new primary, which may be a problem particularly
|
|
||||||
in larger clusters, where <application>repmgrd</application> could select a different promotion
|
|
||||||
candidate to the one intended by the administrator.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<sect2 id="repmgrd-pausing-details">
|
|
||||||
<title>Details on the <application>repmgrd</application> pausing mechanism</title>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The pause state of each node will be stored over a PostgreSQL restart.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
|
||||||
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
|
|
||||||
executed even if <application>repmgrd</application> is not running; in this case,
|
|
||||||
<application>repmgrd</application> will start up in whichever pause state has been set.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
|
||||||
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
|
||||||
<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
</sect2>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
<chapter id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>witness server</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<title>Using a witness server with repmgrd</title>
|
|
||||||
<para>
|
|
||||||
In a situation caused e.g. by a network interruption between two
|
|
||||||
data centres, it's important to avoid a "split-brain" situation where
|
|
||||||
both sides of the network assume they are the active segment and the
|
|
||||||
side without an active primary unilaterally promotes one of its standbys.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
To prevent this situation happening, it's essential to ensure that one
|
|
||||||
network segment has a "voting majority", so other segments will know
|
|
||||||
they're in the minority and not attempt to promote a new primary. Where
|
|
||||||
an odd number of servers exists, this is not an issue. However, if each
|
|
||||||
network has an even number of nodes, it's necessary to provide some way
|
|
||||||
of ensuring a majority, which is where the witness server becomes useful.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This is not a fully-fledged standby node and is not integrated into
|
|
||||||
replication, but it effectively represents the "casting vote" when
|
|
||||||
deciding which network segment has a majority. A witness server can
|
|
||||||
be set up using <xref linkend="repmgr-witness-register">. Note that it only
|
|
||||||
makes sense to create a witness server in conjunction with running
|
|
||||||
<application>repmgrd</application>; the witness server will require its own
|
|
||||||
<application>repmgrd</application> instance.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
@@ -84,11 +84,12 @@
|
|||||||
|
|
||||||
<para>
|
<para>
|
||||||
Double-check which commands will be used to stop/start/restart the current
|
Double-check which commands will be used to stop/start/restart the current
|
||||||
primary; on the current primary execute:
|
primary; this can be done by e.g. executing <command><link linkend="repmgr-node-service">repmgr node service</link></command>
|
||||||
|
on the current primary:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||||
repmgr -f /etc/repmgr.conf node service --list --action=restart</programlisting>
|
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart</programlisting>
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@@ -136,8 +137,8 @@
|
|||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If an exclusive backup is running on the current primary, &repmgr; will not perform the
|
If an exclusive backup is running on the current primary, or if WAL replay is paused on the standby,
|
||||||
switchover.
|
&repmgr; will <emphasis>not</emphasis> perform the switchover.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
@@ -235,7 +236,7 @@
|
|||||||
</note>
|
</note>
|
||||||
<para>
|
<para>
|
||||||
For more details on <application>pg_rewind</application>, see:
|
For more details on <application>pg_rewind</application>, see:
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html">https://www.postgresql.org/docs/current/static/app-pgrewind.html</ulink>.
|
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html">https://www.postgresql.org/docs/current/app-pgrewind.html</ulink>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
<application>pg_rewind</application> has been part of the core PostgreSQL distribution since
|
<application>pg_rewind</application> has been part of the core PostgreSQL distribution since
|
||||||
@@ -346,7 +347,7 @@
|
|||||||
<simpara>
|
<simpara>
|
||||||
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
||||||
data checksums were enabled when the cluster was initialized. See the
|
data checksums were enabled when the cluster was initialized. See the
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html">pg_rewind documentation</ulink>
|
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html">pg_rewind documentation</ulink>
|
||||||
for details.
|
for details.
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
@@ -388,7 +389,7 @@ HINT: check the primary server status before performing any further actions</pro
|
|||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
Note that <varname>shutdown_check_timeout</varname>is set on the node where
|
Note that <varname>shutdown_check_timeout</varname> is set on the node where
|
||||||
<command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
|
<command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
|
||||||
demotion candidate (former primary) will have no effect.
|
demotion candidate (former primary) will have no effect.
|
||||||
</para>
|
</para>
|
||||||
@@ -417,7 +418,7 @@ HINT: stop backup before attempting the switchover</programlisting>
|
|||||||
To proceed, either wait until the backup has finished, or cancel it with the command
|
To proceed, either wait until the backup has finished, or cancel it with the command
|
||||||
<command>SELECT pg_stop_backup()</command>. For more details see the PostgreSQL
|
<command>SELECT pg_stop_backup()</command>. For more details see the PostgreSQL
|
||||||
documentation section
|
documentation section
|
||||||
<ulink url="https://www.postgresql.org/docs/current/static/continuous-archiving.html#BACKUP-LOWLEVEL-BASE-BACKUP-EXCLUSIVE">Making an exclusive low level backup</ulink>.
|
<ulink url="https://www.postgresql.org/docs/current/continuous-archiving.html#BACKUP-LOWLEVEL-BASE-BACKUP-EXCLUSIVE">Making an exclusive low level backup</ulink>.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -4,6 +4,6 @@ Upgrading from repmgr 3
|
|||||||
This document has been integrated into the main `repmgr` documentation
|
This document has been integrated into the main `repmgr` documentation
|
||||||
and is now located here:
|
and is now located here:
|
||||||
|
|
||||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)
|
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/current/upgrading-from-repmgr-3.html)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,9 +7,9 @@
|
|||||||
<title>Upgrading repmgr</title>
|
<title>Upgrading repmgr</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
|
&repmgr; is updated regularly with minor releases (e.g. 4.0.1 to 4.0.2)
|
||||||
containing bugfixes and other minor improvements. Any substantial new
|
containing bugfixes and other minor improvements. Any substantial new
|
||||||
functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
|
functionality will be included in a major release (e.g. 4.0 to 4.1).
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
|
<sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
|
||||||
@@ -19,83 +19,110 @@
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
<title>Upgrading repmgr 4.x and later</title>
|
<title>Upgrading repmgr 4.x and later</title>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
|
From version 4, &repmgr; consists of three elements:
|
||||||
of the following steps:
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
<orderedlist>
|
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running
|
the <application>repmgr</application> and <application>repmgrd</application> executables
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
Install the updated package (or compile the updated source)
|
the objects for the &repmgr; PostgreSQL extension (SQL files for creating/updating
|
||||||
</simpara>
|
repmgr metadata, and the extension control file)
|
||||||
</listitem>
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
|
the shared library module used by <application>repmgrd</application> which
|
||||||
where the <literal>repmgr</literal> shared object library has been updated,
|
is resident in the PostgreSQL backend
|
||||||
restart PostgreSQL.
|
</simpara>
|
||||||
</simpara>
|
</listitem>
|
||||||
</listitem>
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
<listitem>
|
<para>
|
||||||
<simpara>
|
With <emphasis>minor releases</emphasis>, usually changes are only made to the <application>repmgr</application>
|
||||||
For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
|
and <application>repmgrd</application> executables. In this case, the upgrade is quite straightforward,
|
||||||
execute <command>ALTER EXTENSION repmgr UPDATE</command>
|
and is simply a case of installing the new version, and restarting <application>repmgrd</application>
|
||||||
on the primary node in the database where the &repmgr; extension is installed.
|
(if running).
|
||||||
</simpara>
|
|
||||||
<simpara>
|
|
||||||
This will update the extension metadata and, if necessary, apply
|
|
||||||
changes to the &repmgr; extension objects.
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
Start <application>repmgrd</application> (if in use).
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
</orderedlist>
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
For <emphasis>major releases</emphasis>, the &repmgr; PostgreSQL extension will need to be updated
|
||||||
release as they may contain upgrade instructions particular to individual versions.
|
to the latest version. Additionally, if the shared library module has been updated (this is sometimes,
|
||||||
|
but not always the case), PostgreSQL itself will need to be restarted on each node.
|
||||||
</para>
|
</para>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||||
|
release as they may contain upgrade instructions particular to individual versions.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
<para>
|
<sect2 id="upgrading-minor-version" xreflabel="Upgrading a minor version release">
|
||||||
Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
|
|
||||||
changes to the shared object file used by <application>repmgrd</application>; check the
|
|
||||||
<link linkend="appendix-release-notes">release notes</link> for details.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<sect2 id="upgrading-replication-cluster" xreflabel="Upgrading a replication cluster">
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>upgrading</primary>
|
<primary>upgrading</primary>
|
||||||
<secondary>repmgr 4.x and later</secondary>
|
<secondary>minor release</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
<title>Upgrading a replication cluster</title>
|
<title>Upgrading a minor version release</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The process for installing minor version upgrades is quite straightforward:
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
install the new &repmgr; version
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
restart <application>repmgrd</application> on all nodes where it is running
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
Some packaging systems (e.g. <link linkend="packages-debian-ubuntu">Debian/Ubuntu</link>
|
||||||
|
may restart <application>repmgrd</application> as part of the package upgrade process.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The same &repmgr; "major version" (e.g. <literal>4.2</literal>) must be
|
Minor version upgrades can be performed in any order on the nodes in the replication
|
||||||
installed on all nodes in the replication cluster. While it's possible to have differing
|
cluster.
|
||||||
&repmgr; "minor versions" (e.g. <literal>4.2.1</literal>) on different nodes,
|
|
||||||
we strongly recommend updating all nodes to the latest minor version.
|
|
||||||
</para>
|
</para>
|
||||||
<note>
|
|
||||||
|
<para>
|
||||||
|
A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
<para>
|
<para>
|
||||||
Minor version upgrades can be performed in any order on the nodes in the replicaiton
|
The same &repmgr; "major version" (e.g. <literal>4.2</literal>) must be
|
||||||
cluster. In general it makes sense to start on the primary.
|
installed on all nodes in the replication cluster. While it's possible to have differing
|
||||||
|
&repmgr; "minor versions" (e.g. <literal>4.2.1</literal>) on different nodes,
|
||||||
|
we strongly recommend updating all nodes to the latest minor version.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
</note>
|
||||||
A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
|
|
||||||
</para>
|
</sect2>
|
||||||
</note>
|
|
||||||
|
<sect2 id="upgrading-major-version" xreflabel="Upgrading a major version release">
|
||||||
|
<indexterm>
|
||||||
|
<primary>upgrading</primary>
|
||||||
|
<secondary>major release</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<title>Upgrading a major version release</title>
|
||||||
<para>
|
<para>
|
||||||
"major version" upgrades need to be planned more carefully, as they may include
|
"major version" upgrades need to be planned more carefully, as they may include
|
||||||
changes to the &repmgr; metadata (which need to be propagated from the primary to all
|
changes to the &repmgr; metadata (which need to be propagated from the primary to all
|
||||||
@@ -111,7 +138,14 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running
|
Stop <application>repmgrd</application> (if in use) on all nodes where it is running.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
Disable the <application>repmgrd</application> service on all nodes where it is in use;
|
||||||
|
this is to prevent packages from prematurely restarting <application>repmgrd</application>.
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
@@ -121,12 +155,21 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
If running a <literal>systemd</literal>-based Linux distribution, execute (as <literal>root</literal>,
|
||||||
|
or with appropriate <literal>sudo</literal> permissions):
|
||||||
|
<programlisting>
|
||||||
|
systemctl daemon-reload</programlisting>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
If necessary, restart PostgreSQL, then <application>repmgrd</application> (if in use)
|
If the &repmgr; shared library module has been updated (check the <link linkend="appendix-release-notes">release notes</link>!),
|
||||||
on each node. The order in which this is applied to individual nodes is not critical,
|
restart PostgreSQL, then <application>repmgrd</application> (if in use) on each node,
|
||||||
and it's also fine to restart on all nodes first before starting <application>repmgrd</application>.
|
The order in which this is applied to individual nodes is not critical,
|
||||||
|
and it's also fine to restart PostgreSQL on all nodes first before starting <application>repmgrd</application>.
|
||||||
</simpara>
|
</simpara>
|
||||||
<simpara>
|
<simpara>
|
||||||
Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
|
Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
|
||||||
@@ -138,11 +181,18 @@
|
|||||||
<para>
|
<para>
|
||||||
On the primary node, execute
|
On the primary node, execute
|
||||||
<programlisting>
|
<programlisting>
|
||||||
ALTER EXTENSION repmgr UPDATE</programlisting>
|
ALTER EXTENSION repmgr UPDATE</programlisting>
|
||||||
in the database where &repmgr; is installed.
|
in the database where &repmgr; is installed.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
Reenable the <application>repmgrd</application> service on all nodes where it is in use, and
|
||||||
|
ensure it is running.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</orderedlist>
|
</orderedlist>
|
||||||
</para>
|
</para>
|
||||||
<tip>
|
<tip>
|
||||||
@@ -154,6 +204,17 @@
|
|||||||
</tip>
|
</tip>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="upgrading-check-repmgrd" xreflabel="Checking repmgrd status after an upgrade">
|
||||||
|
<indexterm>
|
||||||
|
<primary>upgrading</primary>
|
||||||
|
<secondary>checking repmgrd status</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<title>Checking repmgrd status after an upgrade</title>
|
||||||
|
<para>
|
||||||
|
From <link linkend="release-4.2">repmgr 4.2</link>, once the upgrade is complete, execute the <command><link linkend="repmgr-daemon-status">repmgr daemon status</link></command>
|
||||||
|
command (on any node) to show an overview of the status of <application>repmgrd</application> on all nodes.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
|
<sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
|
||||||
@@ -186,7 +247,7 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</note>
|
</note>
|
||||||
<para>
|
<para>
|
||||||
For further details please see the <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade documentation</ulink>.
|
For further details please see the <ulink url="https://www.postgresql.org/docs/current/pgupgrade.html">pg_upgrade documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If replication slots are in use, bear in mind these will <emphasis>not</emphasis>
|
If replication slots are in use, bear in mind these will <emphasis>not</emphasis>
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
<!ENTITY repmgrversion "4.2dev">
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* errcode.h
|
* errcode.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -48,5 +48,6 @@
|
|||||||
#define ERR_REJOIN_FAIL 24
|
#define ERR_REJOIN_FAIL 24
|
||||||
#define ERR_NODE_STATUS 25
|
#define ERR_NODE_STATUS 25
|
||||||
#define ERR_REPMGRD_PAUSE 26
|
#define ERR_REPMGRD_PAUSE 26
|
||||||
|
#define ERR_REPMGRD_SERVICE 27
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ SELECT repmgr.am_bdr_failover_handler(NULL);
|
|||||||
SELECT repmgr.get_new_primary();
|
SELECT repmgr.get_new_primary();
|
||||||
get_new_primary
|
get_new_primary
|
||||||
-----------------
|
-----------------
|
||||||
|
-1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT repmgr.notify_follow_primary(-1);
|
SELECT repmgr.notify_follow_primary(-1);
|
||||||
|
|||||||
4
log.c
4
log.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.c - Logging methods
|
* log.c - Logging methods
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -85,7 +85,7 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
|||||||
|
|
||||||
time(&t);
|
time(&t);
|
||||||
tm = localtime(&t);
|
tm = localtime(&t);
|
||||||
strftime(buf, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
strftime(buf, sizeof(buf), "[%Y-%m-%d %H:%M:%S]", tm);
|
||||||
fprintf(stderr, "%s [%s] ", buf, level_name);
|
fprintf(stderr, "%s [%s] ", buf, level_name);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
2
log.h
2
log.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.h
|
* log.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
17
repmgr--4.2--4.3.sql
Normal file
17
repmgr--4.2--4.3.sql
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_last_seen()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_last_seen()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_wal_receiver_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
217
repmgr--4.3.sql
Normal file
217
repmgr--4.3.sql
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
CREATE TABLE repmgr.nodes (
|
||||||
|
node_id INTEGER PRIMARY KEY,
|
||||||
|
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||||
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
node_name TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||||
|
location TEXT NOT NULL DEFAULT 'default',
|
||||||
|
priority INT NOT NULL DEFAULT 100,
|
||||||
|
conninfo TEXT NOT NULL,
|
||||||
|
repluser VARCHAR(63) NOT NULL,
|
||||||
|
slot_name TEXT NULL,
|
||||||
|
config_file TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE repmgr.events (
|
||||||
|
node_id INTEGER NOT NULL,
|
||||||
|
event TEXT NOT NULL,
|
||||||
|
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
details TEXT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
DECLARE server_version_num INT;
|
||||||
|
BEGIN
|
||||||
|
SELECT setting
|
||||||
|
FROM pg_catalog.pg_settings
|
||||||
|
WHERE name = 'server_version_num'
|
||||||
|
INTO server_version_num;
|
||||||
|
IF server_version_num >= 90400 THEN
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location PG_LSN NOT NULL,
|
||||||
|
last_wal_standby_location PG_LSN,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
ELSE
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location TEXT NOT NULL,
|
||||||
|
last_wal_standby_location TEXT,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
END IF;
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE INDEX idx_monitoring_history_time
|
||||||
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
|
SELECT n.node_id,
|
||||||
|
n.node_name,
|
||||||
|
n.active,
|
||||||
|
n.upstream_node_id,
|
||||||
|
un.node_name AS upstream_node_name,
|
||||||
|
n.type,
|
||||||
|
n.priority,
|
||||||
|
n.conninfo
|
||||||
|
FROM repmgr.nodes n
|
||||||
|
LEFT JOIN repmgr.nodes un
|
||||||
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
|
|
||||||
|
/* XXX update upgrade scripts! */
|
||||||
|
CREATE TABLE repmgr.voting_term (
|
||||||
|
term INT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX voting_term_restrict
|
||||||
|
ON repmgr.voting_term ((TRUE));
|
||||||
|
|
||||||
|
CREATE RULE voting_term_delete AS
|
||||||
|
ON DELETE TO repmgr.voting_term
|
||||||
|
DO INSTEAD NOTHING;
|
||||||
|
|
||||||
|
|
||||||
|
/* ================= */
|
||||||
|
/* repmgrd functions */
|
||||||
|
/* ================= */
|
||||||
|
|
||||||
|
/* monitoring functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION set_local_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_local_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_set_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_get_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_last_seen()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_last_seen()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
/* failover functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION notify_follow_primary(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_new_primary()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION reset_voting_status()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION unset_bdr_failover_handler()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pidfile()
|
||||||
|
RETURNS TEXT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_running()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_paused()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_wal_receiver_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* views */
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.replication_status AS
|
||||||
|
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||||
|
n.type AS node_type, n.active, last_monitor_time,
|
||||||
|
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||||
|
m.last_wal_standby_location,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN
|
||||||
|
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||||
|
ELSE NULL
|
||||||
|
END AS replication_time_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||||
|
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||||
|
FROM repmgr.monitoring_history m
|
||||||
|
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||||
|
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||||
|
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||||
|
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||||
|
);
|
||||||
|
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements BDR-related actions for the repmgr command line utility
|
* Implements BDR-related actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -126,7 +126,7 @@ do_bdr_register(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
|
/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
|
||||||
extension_status = get_repmgr_extension_status(conn);
|
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||||
|
|
||||||
if (extension_status == REPMGR_UNKNOWN)
|
if (extension_status == REPMGR_UNKNOWN)
|
||||||
{
|
{
|
||||||
@@ -216,7 +216,7 @@ do_bdr_register(void)
|
|||||||
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
||||||
|
|
||||||
/* skip the local node */
|
/* skip the local node */
|
||||||
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, MAXLEN) == 0)
|
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -232,7 +232,7 @@ do_bdr_register(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check repmgr schema exists, skip if not */
|
/* check repmgr schema exists, skip if not */
|
||||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn);
|
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);
|
||||||
|
|
||||||
if (other_node_extension_status != REPMGR_INSTALLED)
|
if (other_node_extension_status != REPMGR_INSTALLED)
|
||||||
{
|
{
|
||||||
@@ -304,9 +304,9 @@ do_bdr_register(void)
|
|||||||
node_info.active = true;
|
node_info.active = true;
|
||||||
node_info.priority = config_file_options.priority;
|
node_info.priority = config_file_options.priority;
|
||||||
|
|
||||||
strncpy(node_info.node_name, config_file_options.node_name, MAXLEN);
|
strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
|
||||||
strncpy(node_info.location, config_file_options.location, MAXLEN);
|
strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
|
||||||
strncpy(node_info.conninfo, config_file_options.conninfo, MAXLEN);
|
strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
if (record_status == RECORD_FOUND)
|
||||||
{
|
{
|
||||||
@@ -330,7 +330,7 @@ do_bdr_register(void)
|
|||||||
* name set when the node was registered.
|
* name set when the node was registered.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (strncmp(node_info.node_name, config_file_options.node_name, MAXLEN) != 0)
|
if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
|
||||||
{
|
{
|
||||||
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
||||||
config_file_options.node_id, node_info.node_name);
|
config_file_options.node_id, node_info.node_name);
|
||||||
@@ -442,7 +442,7 @@ do_bdr_unregister(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
extension_status = get_repmgr_extension_status(conn);
|
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||||
if (extension_status != REPMGR_INSTALLED)
|
if (extension_status != REPMGR_INSTALLED)
|
||||||
{
|
{
|
||||||
log_error(_("repmgr is not installed on database \"%s\""), dbname);
|
log_error(_("repmgr is not installed on database \"%s\""), dbname);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-bdr.h
|
* repmgr-action-bdr.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements cluster information actions for the repmgr command line utility
|
* Implements cluster information actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
#include "repmgr-client-global.h"
|
#include "repmgr-client-global.h"
|
||||||
#include "repmgr-action-cluster.h"
|
#include "repmgr-action-cluster.h"
|
||||||
|
|
||||||
#define SHOW_HEADER_COUNT 7
|
#define SHOW_HEADER_COUNT 8
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
@@ -34,6 +34,7 @@ typedef enum
|
|||||||
SHOW_STATUS,
|
SHOW_STATUS,
|
||||||
SHOW_UPSTREAM_NAME,
|
SHOW_UPSTREAM_NAME,
|
||||||
SHOW_LOCATION,
|
SHOW_LOCATION,
|
||||||
|
SHOW_PRIORITY,
|
||||||
SHOW_CONNINFO
|
SHOW_CONNINFO
|
||||||
} ShowHeader;
|
} ShowHeader;
|
||||||
|
|
||||||
@@ -102,12 +103,19 @@ do_cluster_show(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initialize column headers */
|
||||||
strncpy(headers_show[SHOW_ID].title, _("ID"), MAXLEN);
|
strncpy(headers_show[SHOW_ID].title, _("ID"), MAXLEN);
|
||||||
strncpy(headers_show[SHOW_NAME].title, _("Name"), MAXLEN);
|
strncpy(headers_show[SHOW_NAME].title, _("Name"), MAXLEN);
|
||||||
strncpy(headers_show[SHOW_ROLE].title, _("Role"), MAXLEN);
|
strncpy(headers_show[SHOW_ROLE].title, _("Role"), MAXLEN);
|
||||||
strncpy(headers_show[SHOW_STATUS].title, _("Status"), MAXLEN);
|
strncpy(headers_show[SHOW_STATUS].title, _("Status"), MAXLEN);
|
||||||
strncpy(headers_show[SHOW_UPSTREAM_NAME].title, _("Upstream"), MAXLEN);
|
strncpy(headers_show[SHOW_UPSTREAM_NAME].title, _("Upstream"), MAXLEN);
|
||||||
strncpy(headers_show[SHOW_LOCATION].title, _("Location"), MAXLEN);
|
strncpy(headers_show[SHOW_LOCATION].title, _("Location"), MAXLEN);
|
||||||
|
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
strncpy(headers_show[SHOW_PRIORITY].title, _("Prio."), MAXLEN);
|
||||||
|
else
|
||||||
|
strncpy(headers_show[SHOW_PRIORITY].title, _("Priority"), MAXLEN);
|
||||||
|
|
||||||
strncpy(headers_show[SHOW_CONNINFO].title, _("Connection string"), MAXLEN);
|
strncpy(headers_show[SHOW_CONNINFO].title, _("Connection string"), MAXLEN);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -117,12 +125,26 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
for (i = 0; i < SHOW_HEADER_COUNT; i++)
|
for (i = 0; i < SHOW_HEADER_COUNT; i++)
|
||||||
{
|
{
|
||||||
headers_show[i].max_length = strlen(headers_show[i].title);
|
headers_show[i].display = true;
|
||||||
|
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
{
|
||||||
|
if (i == SHOW_CONNINFO)
|
||||||
|
{
|
||||||
|
headers_show[i].display = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (headers_show[i].display == true)
|
||||||
|
{
|
||||||
|
headers_show[i].max_length = strlen(headers_show[i].title);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
PQExpBufferData details;
|
PQExpBufferData details;
|
||||||
|
PQExpBufferData buf;
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
|
||||||
@@ -133,7 +155,12 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
/* check if node is reachable, but just not letting us in */
|
||||||
|
if (is_server_available_quiet(cell->node_info->conninfo))
|
||||||
|
cell->node_info->node_status = NODE_STATUS_REJECTED;
|
||||||
|
else
|
||||||
|
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||||
|
|
||||||
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||||
|
|
||||||
connection_error_found = true;
|
connection_error_found = true;
|
||||||
@@ -208,6 +235,19 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* node is up but cannot connect */
|
||||||
|
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
|
||||||
|
{
|
||||||
|
if (cell->node_info->active == true)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "? running");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "! running");
|
||||||
|
error_found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* node is unreachable */
|
/* node is unreachable */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -272,6 +312,27 @@ do_cluster_show(void)
|
|||||||
cell->node_info->node_name, cell->node_info->node_id);
|
cell->node_info->node_name, cell->node_info->node_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* warn about issue with paused WAL replay */
|
||||||
|
if (is_wal_replay_paused(cell->node_info->conn, true))
|
||||||
|
{
|
||||||
|
item_list_append_format(&warnings,
|
||||||
|
_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
|
||||||
|
cell->node_info->node_name, cell->node_info->node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* node is up but cannot connect */
|
||||||
|
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
|
||||||
|
{
|
||||||
|
if (cell->node_info->active == true)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "? running");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "! running");
|
||||||
|
error_found = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* node is unreachable */
|
/* node is unreachable */
|
||||||
else
|
else
|
||||||
@@ -286,11 +347,12 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&details, "- failed");
|
appendPQExpBufferStr(&details, "- failed");
|
||||||
error_found = true;
|
error_found = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case WITNESS:
|
case WITNESS:
|
||||||
case BDR:
|
case BDR:
|
||||||
@@ -308,6 +370,20 @@ do_cluster_show(void)
|
|||||||
error_found = true;
|
error_found = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* node is up but cannot connect */
|
||||||
|
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
|
||||||
|
{
|
||||||
|
if (cell->node_info->active == true)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "? rejected");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details, "! failed");
|
||||||
|
error_found = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
/* node is unreachable */
|
/* node is unreachable */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -338,15 +414,35 @@ do_cluster_show(void)
|
|||||||
PQfinish(cell->node_info->conn);
|
PQfinish(cell->node_info->conn);
|
||||||
cell->node_info->conn = NULL;
|
cell->node_info->conn = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&buf);
|
||||||
|
appendPQExpBuffer(&buf, "%i", cell->node_info->node_id);
|
||||||
|
headers_show[SHOW_ID].cur_length = strlen(buf.data);
|
||||||
|
termPQExpBuffer(&buf);
|
||||||
|
|
||||||
headers_show[SHOW_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
|
headers_show[SHOW_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
|
||||||
headers_show[SHOW_NAME].cur_length = strlen(cell->node_info->node_name);
|
headers_show[SHOW_NAME].cur_length = strlen(cell->node_info->node_name);
|
||||||
headers_show[SHOW_STATUS].cur_length = strlen(cell->node_info->details);
|
headers_show[SHOW_STATUS].cur_length = strlen(cell->node_info->details);
|
||||||
headers_show[SHOW_UPSTREAM_NAME].cur_length = strlen(cell->node_info->upstream_node_name);
|
headers_show[SHOW_UPSTREAM_NAME].cur_length = strlen(cell->node_info->upstream_node_name);
|
||||||
|
|
||||||
|
initPQExpBuffer(&buf);
|
||||||
|
appendPQExpBuffer(&buf, "%i", cell->node_info->priority);
|
||||||
|
headers_show[SHOW_PRIORITY].cur_length = strlen(buf.data);
|
||||||
|
termPQExpBuffer(&buf);
|
||||||
|
|
||||||
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
||||||
|
|
||||||
for (i = 0; i < SHOW_HEADER_COUNT; i++)
|
for (i = 0; i < SHOW_HEADER_COUNT; i++)
|
||||||
{
|
{
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
{
|
||||||
|
if (headers_show[i].display == false)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (headers_show[i].cur_length > headers_show[i].max_length)
|
if (headers_show[i].cur_length > headers_show[i].max_length)
|
||||||
{
|
{
|
||||||
headers_show[i].max_length = headers_show[i].cur_length;
|
headers_show[i].max_length = headers_show[i].cur_length;
|
||||||
@@ -398,7 +494,14 @@ do_cluster_show(void)
|
|||||||
printf("| %-*s ", headers_show[SHOW_STATUS].max_length, cell->node_info->details);
|
printf("| %-*s ", headers_show[SHOW_STATUS].max_length, cell->node_info->details);
|
||||||
printf("| %-*s ", headers_show[SHOW_UPSTREAM_NAME].max_length, cell->node_info->upstream_node_name);
|
printf("| %-*s ", headers_show[SHOW_UPSTREAM_NAME].max_length, cell->node_info->upstream_node_name);
|
||||||
printf("| %-*s ", headers_show[SHOW_LOCATION].max_length, cell->node_info->location);
|
printf("| %-*s ", headers_show[SHOW_LOCATION].max_length, cell->node_info->location);
|
||||||
printf("| %-*s\n", headers_show[SHOW_CONNINFO].max_length, cell->node_info->conninfo);
|
printf("| %-*i ", headers_show[SHOW_PRIORITY].max_length, cell->node_info->priority);
|
||||||
|
|
||||||
|
if (headers_show[SHOW_CONNINFO].display == true)
|
||||||
|
{
|
||||||
|
printf("| %-*s", headers_show[SHOW_CONNINFO].max_length, cell->node_info->conninfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
puts("");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -960,7 +1063,9 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||||
|
|
||||||
matrix_rec_list[i]->node_id = cell->node_info->node_id;
|
matrix_rec_list[i]->node_id = cell->node_info->node_id;
|
||||||
strncpy(matrix_rec_list[i]->node_name, cell->node_info->node_name, MAXLEN);
|
strncpy(matrix_rec_list[i]->node_name,
|
||||||
|
cell->node_info->node_name,
|
||||||
|
sizeof(cell->node_info->node_name));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the maximum length of a node name
|
* Find the maximum length of a node name
|
||||||
@@ -1058,6 +1163,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
(void) remote_command(host,
|
(void) remote_command(host,
|
||||||
runtime_options.remote_user,
|
runtime_options.remote_user,
|
||||||
command.data,
|
command.data,
|
||||||
|
config_file_options.ssh_options,
|
||||||
&command_output);
|
&command_output);
|
||||||
|
|
||||||
p = command_output.data;
|
p = command_output.data;
|
||||||
@@ -1174,7 +1280,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
|
|
||||||
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
||||||
cube[h]->node_id = cell->node_info->node_id;
|
cube[h]->node_id = cell->node_info->node_id;
|
||||||
strncpy(cube[h]->node_name, cell->node_info->node_name, MAXLEN);
|
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cell->node_info->node_name));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the maximum length of a node name
|
* Find the maximum length of a node name
|
||||||
@@ -1196,7 +1302,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
/* we don't need the name here */
|
/* we don't need the name here */
|
||||||
cube[h]->matrix_list_rec[i]->node_name[0] = '\0';
|
cube[h]->matrix_list_rec[i]->node_name[0] = '\0';
|
||||||
|
|
||||||
cube[h]->matrix_list_rec[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
cube[h]->matrix_list_rec[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec *) * nodes.node_count);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
|
|
||||||
@@ -1270,6 +1376,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
(void) remote_command(host,
|
(void) remote_command(host,
|
||||||
runtime_options.remote_user,
|
runtime_options.remote_user,
|
||||||
quoted_command.data,
|
quoted_command.data,
|
||||||
|
config_file_options.ssh_options,
|
||||||
&command_output);
|
&command_output);
|
||||||
|
|
||||||
free_conninfo_params(&remote_conninfo);
|
free_conninfo_params(&remote_conninfo);
|
||||||
@@ -1492,6 +1599,7 @@ do_cluster_help(void)
|
|||||||
printf(_(" Configuration file or database connection required.\n"));
|
printf(_(" Configuration file or database connection required.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
||||||
|
printf(_(" --compact display only a subset of fields\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("CLUSTER MATRIX\n"));
|
printf(_("CLUSTER MATRIX\n"));
|
||||||
@@ -1527,7 +1635,7 @@ do_cluster_help(void)
|
|||||||
|
|
||||||
printf(_("CLUSTER CLEANUP\n"));
|
printf(_("CLUSTER CLEANUP\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"cluster cleanup\" purges records from the \"repmgr.monitor\" table.\n"));
|
printf(_(" \"cluster cleanup\" purges records from the \"repmgr.monitoring_history\" table.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" -k, --keep-history=VALUE retain indicated number of days of history (default: 0)\n"));
|
printf(_(" -k, --keep-history=VALUE retain indicated number of days of history (default: 0)\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-cluster.h
|
* repmgr-action-cluster.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -30,14 +30,14 @@ typedef struct
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
t_node_status_rec **node_status_list;
|
t_node_status_rec **node_status_list;
|
||||||
} t_node_matrix_rec;
|
} t_node_matrix_rec;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
t_node_matrix_rec **matrix_list_rec;
|
t_node_matrix_rec **matrix_list_rec;
|
||||||
} t_node_status_cube;
|
} t_node_status_cube;
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* repmgr-action-daemon.c
|
* repmgr-action-daemon.c
|
||||||
*
|
*
|
||||||
* Implements repmgrd actions for the repmgr command line utility
|
* Implements repmgrd actions for the repmgr command line utility
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -18,12 +18,17 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/stat.h> /* for stat() */
|
||||||
|
|
||||||
#include "repmgr.h"
|
#include "repmgr.h"
|
||||||
|
|
||||||
#include "repmgr-client-global.h"
|
#include "repmgr-client-global.h"
|
||||||
#include "repmgr-action-daemon.h"
|
#include "repmgr-action-daemon.h"
|
||||||
|
|
||||||
|
#define REPMGR_DAEMON_STOP_START_WAIT 15
|
||||||
|
#define REPMGR_DAEMON_STATUS_START_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully started")
|
||||||
|
#define REPMGR_DAEMON_STATUS_STOP_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully stopped")
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Possibly also show:
|
* Possibly also show:
|
||||||
@@ -38,13 +43,15 @@ typedef enum
|
|||||||
STATUS_ID = 0,
|
STATUS_ID = 0,
|
||||||
STATUS_NAME,
|
STATUS_NAME,
|
||||||
STATUS_ROLE,
|
STATUS_ROLE,
|
||||||
|
STATUS_PRIORITY,
|
||||||
STATUS_PG,
|
STATUS_PG,
|
||||||
STATUS_RUNNING,
|
STATUS_RUNNING,
|
||||||
STATUS_PID,
|
STATUS_PID,
|
||||||
STATUS_PAUSED
|
STATUS_PAUSED,
|
||||||
|
STATUS_UPSTREAM_LAST_SEEN
|
||||||
} StatusHeader;
|
} StatusHeader;
|
||||||
|
|
||||||
#define STATUS_HEADER_COUNT 7
|
#define STATUS_HEADER_COUNT 9
|
||||||
|
|
||||||
struct ColHeader headers_status[STATUS_HEADER_COUNT];
|
struct ColHeader headers_status[STATUS_HEADER_COUNT];
|
||||||
|
|
||||||
@@ -61,6 +68,7 @@ do_daemon_status(void)
|
|||||||
int i;
|
int i;
|
||||||
RepmgrdInfo **repmgrd_info;
|
RepmgrdInfo **repmgrd_info;
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
|
bool connection_error_found = false;
|
||||||
|
|
||||||
/* Connect to local database to obtain cluster connection data */
|
/* Connect to local database to obtain cluster connection data */
|
||||||
log_verbose(LOG_INFO, _("connecting to database"));
|
log_verbose(LOG_INFO, _("connecting to database"));
|
||||||
@@ -83,14 +91,27 @@ do_daemon_status(void)
|
|||||||
strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
|
strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
|
||||||
strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
|
strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
|
||||||
strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
|
strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
|
||||||
|
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
strncpy(headers_status[STATUS_PRIORITY].title, _("Prio."), MAXLEN);
|
||||||
|
else
|
||||||
|
strncpy(headers_status[STATUS_PRIORITY].title, _("Priority"), MAXLEN);
|
||||||
|
|
||||||
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
|
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
|
||||||
strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
|
strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
|
||||||
strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
|
strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
|
||||||
strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
|
strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
|
||||||
|
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstr. last"), MAXLEN);
|
||||||
|
else
|
||||||
|
strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN);
|
||||||
|
|
||||||
|
|
||||||
for (i = 0; i < STATUS_HEADER_COUNT; i++)
|
for (i = 0; i < STATUS_HEADER_COUNT; i++)
|
||||||
{
|
{
|
||||||
headers_status[i].max_length = strlen(headers_status[i].title);
|
headers_status[i].max_length = strlen(headers_status[i].title);
|
||||||
|
headers_status[i].display = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
@@ -98,18 +119,24 @@ do_daemon_status(void)
|
|||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
|
PQExpBufferData buf;
|
||||||
|
|
||||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
||||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
||||||
repmgrd_info[i]->pid = UNKNOWN_PID;
|
repmgrd_info[i]->pid = UNKNOWN_PID;
|
||||||
|
repmgrd_info[i]->recovery_type = RECTYPE_UNKNOWN;
|
||||||
repmgrd_info[i]->paused = false;
|
repmgrd_info[i]->paused = false;
|
||||||
repmgrd_info[i]->running = false;
|
repmgrd_info[i]->running = false;
|
||||||
repmgrd_info[i]->pg_running = true;
|
repmgrd_info[i]->pg_running = true;
|
||||||
|
repmgrd_info[i]->wal_paused_pending_wal = false;
|
||||||
|
repmgrd_info[i]->upstream_last_seen = -1;
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
connection_error_found = true;
|
||||||
|
|
||||||
if (runtime_options.verbose)
|
if (runtime_options.verbose)
|
||||||
{
|
{
|
||||||
char error[MAXLEN];
|
char error[MAXLEN];
|
||||||
@@ -160,16 +187,55 @@ do_daemon_status(void)
|
|||||||
|
|
||||||
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
|
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
|
||||||
|
|
||||||
|
repmgrd_info[i]->recovery_type = get_recovery_type(cell->node_info->conn);
|
||||||
|
|
||||||
|
if (repmgrd_info[i]->recovery_type == RECTYPE_STANDBY)
|
||||||
|
{
|
||||||
|
repmgrd_info[i]->wal_paused_pending_wal = is_wal_replay_paused(cell->node_info->conn, true);
|
||||||
|
|
||||||
|
if (repmgrd_info[i]->wal_paused_pending_wal == true)
|
||||||
|
{
|
||||||
|
item_list_append_format(&warnings,
|
||||||
|
_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
|
||||||
|
cell->node_info->node_name, cell->node_info->node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
repmgrd_info[i]->upstream_last_seen = get_upstream_last_seen(cell->node_info->conn, cell->node_info->type);
|
||||||
|
if (repmgrd_info[i]->upstream_last_seen < 0)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, "%s", _("n/a"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.compact == true)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i sec(s) ago"), repmgrd_info[i]->upstream_last_seen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(cell->node_info->conn);
|
PQfinish(cell->node_info->conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
|
headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
|
||||||
headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
|
headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
|
||||||
|
|
||||||
|
initPQExpBuffer(&buf);
|
||||||
|
appendPQExpBuffer(&buf, "%i", cell->node_info->priority);
|
||||||
|
headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data);
|
||||||
|
termPQExpBuffer(&buf);
|
||||||
|
|
||||||
headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
|
headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
|
||||||
headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
|
headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
|
||||||
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
|
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
|
||||||
|
|
||||||
|
headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text);
|
||||||
|
|
||||||
for (j = 0; j < STATUS_HEADER_COUNT; j++)
|
for (j = 0; j < STATUS_HEADER_COUNT; j++)
|
||||||
{
|
{
|
||||||
if (headers_status[j].cur_length > headers_status[j].max_length)
|
if (headers_status[j].cur_length > headers_status[j].max_length)
|
||||||
@@ -193,38 +259,61 @@ do_daemon_status(void)
|
|||||||
{
|
{
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
printf("%i,%s,%s,%i,%i,%i,%i\n",
|
int running = repmgrd_info[i]->running ? 1 : 0;
|
||||||
|
int paused = repmgrd_info[i]->paused ? 1 : 0;
|
||||||
|
|
||||||
|
/* If PostgreSQL is not running, repmgrd status is unknown */
|
||||||
|
if (repmgrd_info[i]->pg_running == false)
|
||||||
|
{
|
||||||
|
running = -1;
|
||||||
|
paused = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%i,%s,%s,%i,%i,%i,%i,%i,%i\n",
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
get_node_type_string(cell->node_info->type),
|
get_node_type_string(cell->node_info->type),
|
||||||
repmgrd_info[i]->pg_running ? 1 : 0,
|
repmgrd_info[i]->pg_running ? 1 : 0,
|
||||||
repmgrd_info[i]->running ? 1 : 0,
|
running,
|
||||||
repmgrd_info[i]->pid,
|
repmgrd_info[i]->pid,
|
||||||
repmgrd_info[i]->paused ? 1 : 0);
|
paused,
|
||||||
|
cell->node_info->priority,
|
||||||
|
repmgrd_info[i]->pid == UNKNOWN_PID
|
||||||
|
? -1
|
||||||
|
: repmgrd_info[i]->upstream_last_seen);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id);
|
printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id);
|
||||||
printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
|
printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
|
||||||
printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
|
printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
|
||||||
|
printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority);
|
||||||
|
|
||||||
printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
|
printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
|
||||||
printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
|
printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
|
||||||
printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
|
printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
|
||||||
|
|
||||||
if (repmgrd_info[i]->pid == UNKNOWN_PID)
|
if (repmgrd_info[i]->pid == UNKNOWN_PID)
|
||||||
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a");
|
{
|
||||||
|
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, _("n/a"));
|
||||||
|
printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, _("n/a"));
|
||||||
|
|
||||||
|
}
|
||||||
else
|
else
|
||||||
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no");
|
{
|
||||||
|
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? _("yes") : _("no"));
|
||||||
|
|
||||||
|
printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, repmgrd_info[i]->upstream_last_seen_text);
|
||||||
|
}
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
free(repmgrd_info[i]);
|
pfree(repmgrd_info[i]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
free(repmgrd_info);
|
pfree(repmgrd_info);
|
||||||
|
|
||||||
/* emit any warnings */
|
/* emit any warnings */
|
||||||
|
|
||||||
@@ -238,7 +327,7 @@ do_daemon_status(void)
|
|||||||
printf(_(" - %s\n"), cell->string);
|
printf(_(" - %s\n"), cell->string);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.verbose == false)
|
if (runtime_options.verbose == false && connection_error_found == true)
|
||||||
{
|
{
|
||||||
log_hint(_("execute with --verbose option to see connection error messages"));
|
log_hint(_("execute with --verbose option to see connection error messages"));
|
||||||
}
|
}
|
||||||
@@ -264,18 +353,9 @@ _do_repmgr_pause(bool pause)
|
|||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
NodeInfoListCell *cell = NULL;
|
NodeInfoListCell *cell = NULL;
|
||||||
RepmgrdInfo **repmgrd_info;
|
|
||||||
int i;
|
int i;
|
||||||
int error_nodes = 0;
|
int error_nodes = 0;
|
||||||
|
|
||||||
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
|
|
||||||
|
|
||||||
if (repmgrd_info == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to allocate memory"));
|
|
||||||
exit(ERR_OUT_OF_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Connect to local database to obtain cluster connection data */
|
/* Connect to local database to obtain cluster connection data */
|
||||||
log_verbose(LOG_INFO, _("connecting to database"));
|
log_verbose(LOG_INFO, _("connecting to database"));
|
||||||
|
|
||||||
@@ -290,9 +370,6 @@ _do_repmgr_pause(bool pause)
|
|||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
|
||||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "pausing node %i (%s)",
|
log_verbose(LOG_DEBUG, "pausing node %i (%s)",
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
cell->node_info->node_name);
|
cell->node_info->node_name);
|
||||||
@@ -383,6 +460,285 @@ fetch_node_records(PGconn *conn, NodeInfoList *node_list)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
do_daemon_start(void)
|
||||||
|
{
|
||||||
|
PGconn *conn = NULL;
|
||||||
|
PQExpBufferData repmgrd_command;
|
||||||
|
PQExpBufferData output_buf;
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
if (config_file_options.repmgrd_service_start_command[0] == '\0')
|
||||||
|
{
|
||||||
|
log_error(_("\"repmgrd_service_start_command\" is not set"));
|
||||||
|
log_hint(_("set \"repmgrd_service_start_command\" in \"repmgr.conf\""));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_INFO, _("connecting to local node"));
|
||||||
|
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
/* TODO: if PostgreSQL is not available, have repmgrd loop and retry connection */
|
||||||
|
log_error(_("unable to connect to local node"));
|
||||||
|
log_detail(_("PostgreSQL must be running before \"repmgrd\" can be started"));
|
||||||
|
exit(ERR_DB_CONN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if local connection available, check if repmgr.so is installed, and
|
||||||
|
* whether repmgrd is running
|
||||||
|
*/
|
||||||
|
check_shared_library(conn);
|
||||||
|
|
||||||
|
if (is_repmgrd_running(conn) == true)
|
||||||
|
{
|
||||||
|
pid_t pid = UNKNOWN_PID;
|
||||||
|
|
||||||
|
log_error(_("repmgrd appears to be running already"));
|
||||||
|
|
||||||
|
pid = repmgrd_get_pid(conn);
|
||||||
|
|
||||||
|
if (pid != UNKNOWN_PID)
|
||||||
|
log_detail(_("repmgrd PID is %i"), pid);
|
||||||
|
else
|
||||||
|
log_warning(_("unable to determine repmgrd PID"));
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
|
||||||
|
initPQExpBuffer(&repmgrd_command);
|
||||||
|
appendPQExpBufferStr(&repmgrd_command,
|
||||||
|
config_file_options.repmgrd_service_start_command);
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("prerequisites for starting repmgrd met"));
|
||||||
|
log_detail("following command would be executed:\n %s", repmgrd_command.data);
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("executing: \"%s\""), repmgrd_command.data);
|
||||||
|
|
||||||
|
initPQExpBuffer(&output_buf);
|
||||||
|
|
||||||
|
success = local_command(repmgrd_command.data, &output_buf);
|
||||||
|
termPQExpBuffer(&repmgrd_command);
|
||||||
|
|
||||||
|
if (success == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to start repmgrd"));
|
||||||
|
if (output_buf.data[0] != '\0')
|
||||||
|
log_detail("%s", output_buf.data);
|
||||||
|
termPQExpBuffer(&output_buf);
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&output_buf);
|
||||||
|
|
||||||
|
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||||
|
{
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||||
|
|
||||||
|
if (runtime_options.wait_provided)
|
||||||
|
timeout = runtime_options.wait;
|
||||||
|
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_notice(_("unable to connect to local node"));
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||||
|
exit(ERR_DB_CONN);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (is_repmgrd_running(conn) == true)
|
||||||
|
{
|
||||||
|
log_notice(_("repmgrd was successfully started"));
|
||||||
|
PQfinish(conn);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == timeout)
|
||||||
|
{
|
||||||
|
PQfinish(conn);
|
||||||
|
log_error(_("repmgrd does not appear to have started after %i seconds"),
|
||||||
|
timeout);
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd is running",
|
||||||
|
i, runtime_options.wait);
|
||||||
|
sleep(1);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void do_daemon_stop(void)
|
||||||
|
{
|
||||||
|
PGconn *conn = NULL;
|
||||||
|
PQExpBufferData repmgrd_command;
|
||||||
|
PQExpBufferData output_buf;
|
||||||
|
bool success;
|
||||||
|
bool have_db_connection = true;
|
||||||
|
pid_t pid = UNKNOWN_PID;
|
||||||
|
|
||||||
|
if (config_file_options.repmgrd_service_stop_command[0] == '\0')
|
||||||
|
{
|
||||||
|
log_error(_("\"repmgrd_service_stop_command\" is not set"));
|
||||||
|
log_hint(_("set \"repmgrd_service_stop_command\" in \"repmgr.conf\""));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if local connection available, check if repmgr.so is installed, and
|
||||||
|
* whether repmgrd is running
|
||||||
|
*/
|
||||||
|
log_verbose(LOG_INFO, _("connecting to local node"));
|
||||||
|
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* a PostgreSQL connection is not required to stop repmgrd,
|
||||||
|
*/
|
||||||
|
log_warning(_("unable to connect to local node"));
|
||||||
|
have_db_connection = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
check_shared_library(conn);
|
||||||
|
|
||||||
|
if (is_repmgrd_running(conn) == false)
|
||||||
|
{
|
||||||
|
log_error(_("repmgrd appears to be stopped already"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Attempt to fetch the PID, in case we need it later */
|
||||||
|
pid = repmgrd_get_pid(conn);
|
||||||
|
log_debug("retrieved pid is %i", pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
initPQExpBuffer(&repmgrd_command);
|
||||||
|
|
||||||
|
appendPQExpBufferStr(&repmgrd_command,
|
||||||
|
config_file_options.repmgrd_service_stop_command);
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("prerequisites for stopping repmgrd met"));
|
||||||
|
log_detail("following command would be executed:\n %s", repmgrd_command.data);
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("executing: \"%s\""), repmgrd_command.data);
|
||||||
|
|
||||||
|
initPQExpBuffer(&output_buf);
|
||||||
|
|
||||||
|
success = local_command(repmgrd_command.data, &output_buf);
|
||||||
|
termPQExpBuffer(&repmgrd_command);
|
||||||
|
|
||||||
|
if (success == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to stop repmgrd"));
|
||||||
|
if (output_buf.data[0] != '\0')
|
||||||
|
log_detail("%s", output_buf.data);
|
||||||
|
termPQExpBuffer(&output_buf);
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&output_buf);
|
||||||
|
|
||||||
|
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||||
|
{
|
||||||
|
if (have_db_connection == true)
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
if (pid == UNKNOWN_PID)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* XXX attempt to get pidfile from config
|
||||||
|
* and get contents
|
||||||
|
* ( see check_and_create_pid_file() )
|
||||||
|
* if PID still unknown, exit here
|
||||||
|
*/
|
||||||
|
log_warning(_("unable to determine repmgrd PID"));
|
||||||
|
|
||||||
|
if (have_db_connection == true)
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||||
|
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.wait_provided)
|
||||||
|
timeout = runtime_options.wait;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (kill(pid, 0) == -1)
|
||||||
|
{
|
||||||
|
if (errno == ESRCH)
|
||||||
|
{
|
||||||
|
log_notice(_("repmgrd was successfully stopped"));
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_error(_("unable to determine status of process with PID %i"), pid);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (i == timeout)
|
||||||
|
{
|
||||||
|
log_error(_("repmgrd does not appear to have stopped after %i seconds"),
|
||||||
|
timeout);
|
||||||
|
|
||||||
|
if (have_db_connection == true)
|
||||||
|
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||||
|
|
||||||
|
exit(ERR_REPMGRD_SERVICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd with PID %i is running",
|
||||||
|
i, timeout, pid);
|
||||||
|
sleep(1);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void do_daemon_help(void)
|
void do_daemon_help(void)
|
||||||
{
|
{
|
||||||
print_help_header();
|
print_help_header();
|
||||||
@@ -391,6 +747,8 @@ void do_daemon_help(void)
|
|||||||
printf(_(" %s [OPTIONS] daemon status\n"), progname());
|
printf(_(" %s [OPTIONS] daemon status\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] daemon pause\n"), progname());
|
printf(_(" %s [OPTIONS] daemon pause\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] daemon unpause\n"), progname());
|
printf(_(" %s [OPTIONS] daemon unpause\n"), progname());
|
||||||
|
printf(_(" %s [OPTIONS] daemon start\n"), progname());
|
||||||
|
printf(_(" %s [OPTIONS] daemon stop\n"), progname());
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("DAEMON STATUS\n"));
|
printf(_("DAEMON STATUS\n"));
|
||||||
@@ -401,6 +759,24 @@ void do_daemon_help(void)
|
|||||||
printf(_(" --verbose show text of database connection error messages\n"));
|
printf(_(" --verbose show text of database connection error messages\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
|
printf(_("DAEMON START\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" \"daemon start\" attempts to start repmgrd\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" --dry-run check prerequisites but don't start repmgrd\n"));
|
||||||
|
printf(_(" -w/--wait wait for repmgrd to start (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||||
|
printf(_(" --no-wait don't wait for repmgrd to start\n"));
|
||||||
|
puts("");
|
||||||
|
|
||||||
|
printf(_("DAEMON STOP\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" \"daemon stop\" attempts to stop repmgrd\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" --dry-run check prerequisites but don't stop repmgrd\n"));
|
||||||
|
printf(_(" -w/--wait wait for repmgrd to stop (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||||
|
printf(_(" --no-wait don't wait for repmgrd to stop\n"));
|
||||||
|
puts("");
|
||||||
|
|
||||||
printf(_("DAEMON PAUSE\n"));
|
printf(_("DAEMON PAUSE\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
|
printf(_(" \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
|
||||||
@@ -408,13 +784,12 @@ void do_daemon_help(void)
|
|||||||
printf(_(" --dry-run check if nodes are reachable but don't pause repmgrd\n"));
|
printf(_(" --dry-run check if nodes are reachable but don't pause repmgrd\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("DAEMON PAUSE\n"));
|
printf(_("DAEMON UNPAUSE\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"daemon unpause\" instructs repmgrd on each node to resume failover detection\n"));
|
printf(_(" \"daemon unpause\" instructs repmgrd on each node to resume failover detection\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n"));
|
printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-daemon.h
|
* repmgr-action-daemon.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -23,6 +23,8 @@
|
|||||||
extern void do_daemon_status(void);
|
extern void do_daemon_status(void);
|
||||||
extern void do_daemon_pause(void);
|
extern void do_daemon_pause(void);
|
||||||
extern void do_daemon_unpause(void);
|
extern void do_daemon_unpause(void);
|
||||||
|
extern void do_daemon_start(void);
|
||||||
|
extern void do_daemon_stop(void);
|
||||||
|
|
||||||
extern void do_daemon_help(void);
|
extern void do_daemon_help(void);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements actions available for any kind of node
|
* Implements actions available for any kind of node
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -48,6 +48,7 @@ static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode,
|
|||||||
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
|
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NODE STATUS
|
* NODE STATUS
|
||||||
@@ -66,7 +67,6 @@ do_node_status(void)
|
|||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
|
|
||||||
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
||||||
char server_version[MAXLEN];
|
|
||||||
char cluster_size[MAXLEN];
|
char cluster_size[MAXLEN];
|
||||||
PQExpBufferData output;
|
PQExpBufferData output;
|
||||||
|
|
||||||
@@ -76,21 +76,29 @@ do_node_status(void)
|
|||||||
|
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
||||||
ReplInfo replication_info = T_REPLINFO_INTIALIZER;
|
ReplInfo replication_info;
|
||||||
t_recovery_conf recovery_conf = T_RECOVERY_CONF_INITIALIZER;
|
t_recovery_conf recovery_conf = T_RECOVERY_CONF_INITIALIZER;
|
||||||
|
|
||||||
char data_dir[MAXPGPATH] = "";
|
char data_dir[MAXPGPATH] = "";
|
||||||
|
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||||
|
char server_version_str[MAXVERSIONSTR] = "";
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A database connection is *not* required for this check
|
||||||
|
*/
|
||||||
if (runtime_options.is_shutdown_cleanly == true)
|
if (runtime_options.is_shutdown_cleanly == true)
|
||||||
{
|
{
|
||||||
return _do_node_status_is_shutdown_cleanly();
|
return _do_node_status_is_shutdown_cleanly();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
init_replication_info(&replication_info);
|
||||||
|
|
||||||
|
|
||||||
/* config file required, so we should have "conninfo" and "data_directory" */
|
/* config file required, so we should have "conninfo" and "data_directory" */
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
strncpy(data_dir, config_file_options.data_directory, MAXPGPATH);
|
strncpy(data_dir, config_file_options.data_directory, MAXPGPATH);
|
||||||
|
|
||||||
server_version_num = get_server_version(conn, NULL);
|
server_version_num = get_server_version(conn, server_version_str);
|
||||||
|
|
||||||
/* check node exists */
|
/* check node exists */
|
||||||
|
|
||||||
@@ -101,18 +109,16 @@ do_node_status(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) get_server_version(conn, server_version);
|
|
||||||
|
|
||||||
if (get_cluster_size(conn, cluster_size) == false)
|
if (get_cluster_size(conn, cluster_size) == false)
|
||||||
strncpy(cluster_size, _("unknown"), MAXLEN);
|
strncpy(cluster_size, _("unknown"), MAXLEN);
|
||||||
|
|
||||||
recovery_type = get_recovery_type(conn);
|
recovery_type = get_recovery_type(conn);
|
||||||
|
|
||||||
get_node_replication_stats(conn, server_version_num, &node_info);
|
get_node_replication_stats(conn, &node_info);
|
||||||
|
|
||||||
key_value_list_set(&node_status,
|
key_value_list_set(&node_status,
|
||||||
"PostgreSQL version",
|
"PostgreSQL version",
|
||||||
server_version);
|
server_version_str);
|
||||||
|
|
||||||
key_value_list_set(&node_status,
|
key_value_list_set(&node_status,
|
||||||
"Total data size",
|
"Total data size",
|
||||||
@@ -219,19 +225,27 @@ do_node_status(void)
|
|||||||
|
|
||||||
ready_files = get_ready_archive_files(conn, data_dir);
|
ready_files = get_ready_archive_files(conn, data_dir);
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (ready_files == ARCHIVE_STATUS_DIR_ERROR)
|
||||||
{
|
{
|
||||||
key_value_list_set_format(&node_status,
|
item_list_append_format(&warnings,
|
||||||
"WALs pending archiving",
|
"- unable to check archive_status directory\n");
|
||||||
"%i",
|
|
||||||
ready_files);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
key_value_list_set_format(&node_status,
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
"WALs pending archiving",
|
{
|
||||||
"%i pending files",
|
key_value_list_set_format(&node_status,
|
||||||
ready_files);
|
"WALs pending archiving",
|
||||||
|
"%i",
|
||||||
|
ready_files);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
key_value_list_set_format(&node_status,
|
||||||
|
"WALs pending archiving",
|
||||||
|
"%i pending files",
|
||||||
|
ready_files);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (guc_set(conn, "archive_mode", "=", "off"))
|
if (guc_set(conn, "archive_mode", "=", "off"))
|
||||||
@@ -354,7 +368,7 @@ do_node_status(void)
|
|||||||
initPQExpBuffer(&slotinfo);
|
initPQExpBuffer(&slotinfo);
|
||||||
|
|
||||||
appendPQExpBuffer(&slotinfo,
|
appendPQExpBuffer(&slotinfo,
|
||||||
"%i (of maximal %i; %i missing)",
|
"%i physical (of maximal %i; %i missing)",
|
||||||
node_info.active_replication_slots + node_info.inactive_replication_slots,
|
node_info.active_replication_slots + node_info.inactive_replication_slots,
|
||||||
node_info.max_replication_slots,
|
node_info.max_replication_slots,
|
||||||
missing_slots.node_count);
|
missing_slots.node_count);
|
||||||
@@ -371,13 +385,13 @@ do_node_status(void)
|
|||||||
node_info.inactive_replication_slots);
|
node_info.inactive_replication_slots);
|
||||||
|
|
||||||
item_list_append_format(&warnings,
|
item_list_append_format(&warnings,
|
||||||
_("- node has %i inactive replication slots"),
|
_("- node has %i inactive physical replication slots"),
|
||||||
node_info.inactive_replication_slots);
|
node_info.inactive_replication_slots);
|
||||||
|
|
||||||
for (cell = inactive_replication_slots.head; cell; cell = cell->next)
|
for (cell = inactive_replication_slots.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
item_list_append_format(&warnings,
|
item_list_append_format(&warnings,
|
||||||
" - %s (%s)", cell->key, cell->value);
|
" - %s", cell->key);
|
||||||
}
|
}
|
||||||
|
|
||||||
key_value_list_free(&inactive_replication_slots);
|
key_value_list_free(&inactive_replication_slots);
|
||||||
@@ -399,7 +413,7 @@ do_node_status(void)
|
|||||||
node_info.upstream_node_name,
|
node_info.upstream_node_name,
|
||||||
node_info.upstream_node_id);
|
node_info.upstream_node_id);
|
||||||
|
|
||||||
get_replication_info(conn, &replication_info);
|
get_replication_info(conn, node_info.type, &replication_info);
|
||||||
|
|
||||||
key_value_list_set_format(&node_status,
|
key_value_list_set_format(&node_status,
|
||||||
"Replication lag",
|
"Replication lag",
|
||||||
@@ -654,27 +668,17 @@ _do_node_status_is_shutdown_cleanly(void)
|
|||||||
node_status = NODE_STATUS_DOWN;
|
node_status = NODE_STATUS_DOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status));
|
log_verbose(LOG_DEBUG, "node status determined as: %s",
|
||||||
|
print_node_status(node_status));
|
||||||
|
|
||||||
switch (node_status)
|
appendPQExpBuffer(&output,
|
||||||
|
"%s", print_node_status(node_status));
|
||||||
|
|
||||||
|
if (node_status == NODE_STATUS_DOWN)
|
||||||
{
|
{
|
||||||
case NODE_STATUS_UP:
|
appendPQExpBuffer(&output,
|
||||||
appendPQExpBufferStr(&output, "RUNNING");
|
" --last-checkpoint-lsn=%X/%X",
|
||||||
break;
|
format_lsn(checkPoint));
|
||||||
case NODE_STATUS_SHUTTING_DOWN:
|
|
||||||
appendPQExpBufferStr(&output, "SHUTTING_DOWN");
|
|
||||||
break;
|
|
||||||
case NODE_STATUS_DOWN:
|
|
||||||
appendPQExpBuffer(&output,
|
|
||||||
"SHUTDOWN --last-checkpoint-lsn=%X/%X",
|
|
||||||
format_lsn(checkPoint));
|
|
||||||
break;
|
|
||||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
|
||||||
appendPQExpBufferStr(&output, "UNCLEAN_SHUTDOWN");
|
|
||||||
break;
|
|
||||||
case NODE_STATUS_UNKNOWN:
|
|
||||||
appendPQExpBufferStr(&output, "UNKNOWN");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s\n", output.data);
|
printf("%s\n", output.data);
|
||||||
@@ -725,10 +729,8 @@ do_node_check(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
server_version_num = get_server_version(conn, NULL);
|
|
||||||
|
|
||||||
/* add replication statistics to node record */
|
/* add replication statistics to node record */
|
||||||
get_node_replication_stats(conn, server_version_num, &node_info);
|
get_node_replication_stats(conn, &node_info);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle specific checks ======================
|
* handle specific checks ======================
|
||||||
@@ -792,6 +794,16 @@ do_node_check(void)
|
|||||||
exit(return_code);
|
exit(return_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.data_directory_config == true)
|
||||||
|
{
|
||||||
|
return_code = do_node_check_data_directory(conn,
|
||||||
|
runtime_options.output_mode,
|
||||||
|
&node_info,
|
||||||
|
NULL);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(return_code);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_NAGIOS)
|
if (runtime_options.output_mode == OM_NAGIOS)
|
||||||
{
|
{
|
||||||
@@ -824,6 +836,9 @@ do_node_check(void)
|
|||||||
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
issue_detected = true;
|
issue_detected = true;
|
||||||
|
|
||||||
|
if (do_node_check_data_directory(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
|
issue_detected = true;
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&output,
|
appendPQExpBuffer(&output,
|
||||||
@@ -1393,7 +1408,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (lag_seconds < 0)
|
else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
|
||||||
{
|
{
|
||||||
status = CHECK_STATUS_UNKNOWN;
|
status = CHECK_STATUS_UNKNOWN;
|
||||||
|
|
||||||
@@ -1445,11 +1460,9 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
|||||||
switch (mode)
|
switch (mode)
|
||||||
{
|
{
|
||||||
case OM_OPTFORMAT:
|
case OM_OPTFORMAT:
|
||||||
{
|
printf("--status=%s %s\n",
|
||||||
printf("--status=%s %s\n",
|
output_check_status(status),
|
||||||
output_check_status(status),
|
details.data);
|
||||||
details.data);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OM_NAGIOS:
|
case OM_NAGIOS:
|
||||||
printf("REPMGR_REPLICATION_LAG %s: %s\n",
|
printf("REPMGR_REPLICATION_LAG %s: %s\n",
|
||||||
@@ -1618,7 +1631,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
|||||||
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
if (server_version_num < 90400)
|
if (PQserverVersion(conn) < 90400)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
_("replication slots not available for this PostgreSQL version"));
|
_("replication slots not available for this PostgreSQL version"));
|
||||||
@@ -1626,12 +1639,12 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
|||||||
else if (node_info->total_replication_slots == 0)
|
else if (node_info->total_replication_slots == 0)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
_("node has no replication slots"));
|
_("node has no physical replication slots"));
|
||||||
}
|
}
|
||||||
else if (node_info->inactive_replication_slots == 0)
|
else if (node_info->inactive_replication_slots == 0)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&details,
|
appendPQExpBuffer(&details,
|
||||||
_("%i of %i replication slots are active"),
|
_("%i of %i physical replication slots are active"),
|
||||||
node_info->total_replication_slots,
|
node_info->total_replication_slots,
|
||||||
node_info->total_replication_slots);
|
node_info->total_replication_slots);
|
||||||
}
|
}
|
||||||
@@ -1640,7 +1653,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
|||||||
status = CHECK_STATUS_CRITICAL;
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
|
||||||
appendPQExpBuffer(&details,
|
appendPQExpBuffer(&details,
|
||||||
_("%i of %i replication slots are inactive"),
|
_("%i of %i physical replication slots are inactive"),
|
||||||
node_info->inactive_replication_slots,
|
node_info->inactive_replication_slots,
|
||||||
node_info->total_replication_slots);
|
node_info->total_replication_slots);
|
||||||
}
|
}
|
||||||
@@ -1694,7 +1707,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
if (server_version_num < 90400)
|
if (PQserverVersion(conn) < 90400)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
_("replication slots not available for this PostgreSQL version"));
|
_("replication slots not available for this PostgreSQL version"));
|
||||||
@@ -1708,7 +1721,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
if (missing_slots.node_count == 0)
|
if (missing_slots.node_count == 0)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
_("node has no missing replication slots"));
|
_("node has no missing physical replication slots"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1718,7 +1731,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
status = CHECK_STATUS_CRITICAL;
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
|
||||||
appendPQExpBuffer(&details,
|
appendPQExpBuffer(&details,
|
||||||
_("%i replication slots are missing"),
|
_("%i physical replication slots are missing"),
|
||||||
missing_slots.node_count);
|
missing_slots.node_count);
|
||||||
|
|
||||||
if (missing_slots.node_count)
|
if (missing_slots.node_count)
|
||||||
@@ -1779,7 +1792,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
if (list_output != NULL)
|
if (list_output != NULL)
|
||||||
{
|
{
|
||||||
check_status_list_set(list_output,
|
check_status_list_set(list_output,
|
||||||
"Missing replication slots",
|
"Missing physical replication slots",
|
||||||
status,
|
status,
|
||||||
details.data);
|
details.data);
|
||||||
}
|
}
|
||||||
@@ -1800,6 +1813,135 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CheckStatus
|
||||||
|
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
|
{
|
||||||
|
CheckStatus status = CHECK_STATUS_OK;
|
||||||
|
char actual_data_directory[MAXPGPATH] = "";
|
||||||
|
PQExpBufferData details;
|
||||||
|
|
||||||
|
if (mode == OM_CSV && list_output == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("--csv output not provided with --data-directory-config option"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
/*
|
||||||
|
* Check actual data directory matches that in repmgr.conf; note this requires
|
||||||
|
* a superuser connection
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (is_superuser_connection(conn, NULL) == true)
|
||||||
|
{
|
||||||
|
/* we expect to have a database connection */
|
||||||
|
if (get_pg_setting(conn, "data_directory", actual_data_directory) == false)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("unable to determine current \"data_directory\""));
|
||||||
|
status = CHECK_STATUS_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strncmp(actual_data_directory, config_file_options.data_directory, MAXPGPATH) != 0)
|
||||||
|
{
|
||||||
|
if (mode != OM_NAGIOS)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("configured \"data_directory\" is \"%s\"; "),
|
||||||
|
config_file_options.data_directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
"actual data directory is \"%s\"",
|
||||||
|
actual_data_directory);
|
||||||
|
|
||||||
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("configured \"data_directory\" is \"%s\""),
|
||||||
|
config_file_options.data_directory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If no superuser connection available, sanity-check that the configuration directory looks
|
||||||
|
* like a PostgreSQL directory and hope it's the right one.
|
||||||
|
*/
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mode == OM_TEXT)
|
||||||
|
{
|
||||||
|
log_info(_("connection is not a superuser connection, falling back to simple check"));
|
||||||
|
|
||||||
|
/* XXX add -S/--superuser option */
|
||||||
|
if (PQserverVersion(conn) >= 100000)
|
||||||
|
{
|
||||||
|
log_hint(_("add the \"%s\" user to group \"pg_read_all_settings\""),
|
||||||
|
PQuser(conn));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_pg_dir(config_file_options.data_directory) == false)
|
||||||
|
{
|
||||||
|
if (mode == OM_NAGIOS)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("configured \"data_directory\" is not a PostgreSQL data directory"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("configured \"data_directory\" \"%s\" is not a PostgreSQL data directory"),
|
||||||
|
actual_data_directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case OM_OPTFORMAT:
|
||||||
|
printf("--configured-data-directory=%s\n",
|
||||||
|
output_check_status(status));
|
||||||
|
break;
|
||||||
|
case OM_NAGIOS:
|
||||||
|
printf("REPMGR_DATA_DIRECTORY %s: %s",
|
||||||
|
output_check_status(status),
|
||||||
|
config_file_options.data_directory);
|
||||||
|
|
||||||
|
if (status == CHECK_STATUS_CRITICAL)
|
||||||
|
{
|
||||||
|
printf(" | %s", details.data);
|
||||||
|
}
|
||||||
|
puts("");
|
||||||
|
break;
|
||||||
|
case OM_CSV:
|
||||||
|
case OM_TEXT:
|
||||||
|
if (list_output != NULL)
|
||||||
|
{
|
||||||
|
check_status_list_set(list_output,
|
||||||
|
"Configured data directory",
|
||||||
|
status,
|
||||||
|
details.data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("%s (%s)\n",
|
||||||
|
output_check_status(status),
|
||||||
|
details.data);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&details);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
do_node_service(void)
|
do_node_service(void)
|
||||||
@@ -1993,7 +2135,9 @@ void
|
|||||||
do_node_rejoin(void)
|
do_node_rejoin(void)
|
||||||
{
|
{
|
||||||
PGconn *upstream_conn = NULL;
|
PGconn *upstream_conn = NULL;
|
||||||
RecoveryType upstream_recovery_type = RECTYPE_UNKNOWN;
|
RecoveryType primary_recovery_type = RECTYPE_UNKNOWN;
|
||||||
|
PGconn *primary_conn = NULL;
|
||||||
|
|
||||||
DBState db_state;
|
DBState db_state;
|
||||||
PGPing status;
|
PGPing status;
|
||||||
bool is_shutdown = true;
|
bool is_shutdown = true;
|
||||||
@@ -2005,11 +2149,9 @@ do_node_rejoin(void)
|
|||||||
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
|
||||||
int follow_error_code = SUCCESS;
|
int follow_error_code = SUCCESS;
|
||||||
|
|
||||||
/* check node is not actually running */
|
/* check node is not actually running */
|
||||||
|
|
||||||
status = PQping(config_file_options.conninfo);
|
status = PQping(config_file_options.conninfo);
|
||||||
|
|
||||||
switch (status)
|
switch (status)
|
||||||
@@ -2035,7 +2177,7 @@ do_node_rejoin(void)
|
|||||||
log_error(_("database is still running in state \"%s\""),
|
log_error(_("database is still running in state \"%s\""),
|
||||||
describe_db_state(db_state));
|
describe_db_state(db_state));
|
||||||
log_hint(_("\"repmgr node rejoin\" cannot be executed on a running node"));
|
log_hint(_("\"repmgr node rejoin\" cannot be executed on a running node"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if cleanly shut down */
|
/* check if cleanly shut down */
|
||||||
@@ -2054,18 +2196,15 @@ do_node_rejoin(void)
|
|||||||
log_detail(_("pg_rewind will not be able to run"));
|
log_detail(_("pg_rewind will not be able to run"));
|
||||||
}
|
}
|
||||||
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* check provided upstream connection */
|
/* check provided upstream connection */
|
||||||
upstream_conn = establish_db_connection_by_params(&source_conninfo, true);
|
upstream_conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||||
|
|
||||||
/* sanity checks for 9.3 */
|
/* sanity checks for 9.3 */
|
||||||
server_version_num = get_server_version(upstream_conn, NULL);
|
if (PQserverVersion(upstream_conn) < 90400)
|
||||||
|
|
||||||
if (server_version_num < 90400)
|
|
||||||
check_93_config();
|
check_93_config();
|
||||||
|
|
||||||
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
||||||
@@ -2076,40 +2215,85 @@ do_node_rejoin(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
|
||||||
|
|
||||||
/* connect to registered primary and check it's not in recovery */
|
/* connect to registered primary and check it's not in recovery */
|
||||||
upstream_conn = establish_db_connection(primary_node_record.conninfo, false);
|
primary_conn = establish_db_connection(primary_node_record.conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to current primary \"%s\" (node ID: %i)"),
|
RecoveryType upstream_recovery_type = get_recovery_type(upstream_conn);
|
||||||
|
|
||||||
|
log_error(_("unable to connect to current registered primary \"%s\" (node ID: %i)"),
|
||||||
primary_node_record.node_name,
|
primary_node_record.node_name,
|
||||||
primary_node_record.node_id);
|
primary_node_record.node_id);
|
||||||
log_detail(_("primay node conninfo is: \"%s\""),
|
log_detail(_("registered primary node conninfo is: \"%s\""),
|
||||||
primary_node_record.conninfo);
|
primary_node_record.conninfo);
|
||||||
|
/*
|
||||||
|
* Catch case where provided upstream is not in recovery, but is also
|
||||||
|
* not registered as primary
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (upstream_recovery_type == RECTYPE_PRIMARY)
|
||||||
|
{
|
||||||
|
log_warning(_("provided upstream connection string is for a server which is not in recovery, but not registered as primary"));
|
||||||
|
log_hint(_("fix repmgr metadata configuration before continuing"));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(upstream_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
upstream_recovery_type = get_recovery_type(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
|
|
||||||
if (upstream_recovery_type != RECTYPE_PRIMARY)
|
primary_recovery_type = get_recovery_type(primary_conn);
|
||||||
|
|
||||||
|
if (primary_recovery_type != RECTYPE_PRIMARY)
|
||||||
{
|
{
|
||||||
log_error(_("primary server is registered node \"%s\" (ID: %i), but server is not a primary"),
|
log_error(_("primary server is registered as node \"%s\" (ID: %i), but server is not a primary"),
|
||||||
primary_node_record.node_name,
|
primary_node_record.node_name,
|
||||||
primary_node_record.node_id);
|
primary_node_record.node_id);
|
||||||
/* TODO: hint about checking cluster */
|
/* TODO: hint about checking cluster */
|
||||||
PQfinish(upstream_conn);
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sanity-check that it will actually be possible to stream from the new upstream
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
bool can_follow;
|
||||||
|
TimeLineID tli = get_min_recovery_end_timeline(config_file_options.data_directory);
|
||||||
|
XLogRecPtr min_recovery_location = get_min_recovery_location(config_file_options.data_directory);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's possible this was a former primary, so the minRecoveryPoint*
|
||||||
|
* fields may be empty.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (min_recovery_location == InvalidXLogRecPtr)
|
||||||
|
min_recovery_location = get_latest_checkpoint_location(config_file_options.data_directory);
|
||||||
|
if (tli == 0)
|
||||||
|
tli = get_timeline(config_file_options.data_directory);
|
||||||
|
|
||||||
|
can_follow = check_node_can_attach(tli,
|
||||||
|
min_recovery_location,
|
||||||
|
primary_conn,
|
||||||
|
&primary_node_record,
|
||||||
|
true);
|
||||||
|
|
||||||
|
if (can_follow == false)
|
||||||
|
{
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_REJOIN_FAIL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* --force-rewind specified - check prerequisites, and attempt to execute
|
* --force-rewind specified - check prerequisites, and attempt to execute
|
||||||
* (if --dry-run provided, just output the command which would be executed)
|
* (if --dry-run provided, just output the command which would be executed)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
if (runtime_options.force_rewind_used == true)
|
if (runtime_options.force_rewind_used == true)
|
||||||
{
|
{
|
||||||
PQExpBufferData msg;
|
PQExpBufferData msg;
|
||||||
@@ -2122,12 +2306,12 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&msg);
|
initPQExpBuffer(&msg);
|
||||||
|
|
||||||
if (can_use_pg_rewind(upstream_conn, config_file_options.data_directory, &msg) == false)
|
if (can_use_pg_rewind(primary_conn, config_file_options.data_directory, &msg) == false)
|
||||||
{
|
{
|
||||||
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
||||||
log_detail("%s", msg.data);
|
log_detail("%s", msg.data);
|
||||||
termPQExpBuffer(&msg);
|
termPQExpBuffer(&msg);
|
||||||
PQfinish(upstream_conn);
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -2186,8 +2370,8 @@ do_node_rejoin(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_notice(_("executing pg_rewind"));
|
log_notice(_("executing pg_rewind"));
|
||||||
log_debug("pg_rewind command is:\n %s",
|
log_detail(_("pg_rewind command is \"%s\""),
|
||||||
command.data);
|
command.data);
|
||||||
|
|
||||||
initPQExpBuffer(&command_output);
|
initPQExpBuffer(&command_output);
|
||||||
|
|
||||||
@@ -2203,7 +2387,7 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&command_output);
|
termPQExpBuffer(&command_output);
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&command_output);
|
termPQExpBuffer(&command_output);
|
||||||
@@ -2292,6 +2476,8 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&slotdir_ent_path);
|
termPQExpBuffer(&slotdir_ent_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closedir(slotdir);
|
||||||
}
|
}
|
||||||
termPQExpBuffer(&slotdir_path);
|
termPQExpBuffer(&slotdir_path);
|
||||||
}
|
}
|
||||||
@@ -2306,26 +2492,34 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&follow_output);
|
initPQExpBuffer(&follow_output);
|
||||||
|
|
||||||
success = do_standby_follow_internal(upstream_conn,
|
/*
|
||||||
|
* do_standby_follow_internal() can handle situations where the follow
|
||||||
|
* target is not the primary, so requires database handles to both
|
||||||
|
* (even if they point to the same node). For the time being,
|
||||||
|
* "node rejoin" will only attach a standby to the primary.
|
||||||
|
*/
|
||||||
|
success = do_standby_follow_internal(primary_conn,
|
||||||
|
primary_conn,
|
||||||
&primary_node_record,
|
&primary_node_record,
|
||||||
&follow_output,
|
&follow_output,
|
||||||
|
ERR_REJOIN_FAIL,
|
||||||
&follow_error_code);
|
&follow_error_code);
|
||||||
|
|
||||||
if (success == false)
|
if (success == false)
|
||||||
{
|
{
|
||||||
log_notice(_("NODE REJOIN failed"));
|
log_error(_("NODE REJOIN failed"));
|
||||||
|
|
||||||
if (strlen(follow_output.data))
|
if (strlen(follow_output.data))
|
||||||
log_detail("%s", follow_output.data);
|
log_detail("%s", follow_output.data);
|
||||||
|
|
||||||
create_event_notification(upstream_conn,
|
create_event_notification(primary_conn,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
"node_rejoin",
|
"node_rejoin",
|
||||||
success,
|
success,
|
||||||
follow_output.data);
|
follow_output.data);
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
termPQExpBuffer(&follow_output);
|
termPQExpBuffer(&follow_output);
|
||||||
exit(follow_error_code);
|
exit(follow_error_code);
|
||||||
@@ -2368,7 +2562,7 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
for (; i < config_file_options.node_rejoin_timeout; i++)
|
for (; i < config_file_options.node_rejoin_timeout; i++)
|
||||||
{
|
{
|
||||||
success = is_downstream_node_attached(upstream_conn, config_file_options.node_name);
|
success = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
||||||
|
|
||||||
if (success == true)
|
if (success == true)
|
||||||
{
|
{
|
||||||
@@ -2379,9 +2573,13 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
if (i % 5 == 0)
|
if (i % 5 == 0)
|
||||||
{
|
{
|
||||||
log_info(_("waiting for node %i to connect to new primary; %i of max %i attempts"),
|
log_info(_("waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts"),
|
||||||
|
config_file_options.node_name,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
i + 1, config_file_options.node_rejoin_timeout);
|
i + 1, config_file_options.node_rejoin_timeout);
|
||||||
|
log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
|
||||||
|
primary_node_record.node_name,
|
||||||
|
config_file_options.node_name);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2393,7 +2591,7 @@ do_node_rejoin(void)
|
|||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
create_event_notification(upstream_conn,
|
create_event_notification(primary_conn,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
"node_rejoin",
|
"node_rejoin",
|
||||||
@@ -2403,13 +2601,18 @@ do_node_rejoin(void)
|
|||||||
if (success == false)
|
if (success == false)
|
||||||
{
|
{
|
||||||
termPQExpBuffer(&follow_output);
|
termPQExpBuffer(&follow_output);
|
||||||
log_notice(_("NODE REJOIN failed"));
|
log_error(_("NODE REJOIN failed"));
|
||||||
|
log_detail(_("no record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
||||||
|
config_file_options.node_name,
|
||||||
|
primary_node_record.node_name);
|
||||||
|
log_hint(_("check the PostgreSQL log on the local node"));
|
||||||
exit(ERR_REJOIN_FAIL);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
success = is_downstream_node_attached(upstream_conn, config_file_options.node_name);
|
/* -W/--no-wait provided - check once */
|
||||||
|
success = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2480,6 +2683,48 @@ do_node_rejoin(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Currently for testing purposes only, not documented;
|
||||||
|
* use at own risk!
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
do_node_control(void)
|
||||||
|
{
|
||||||
|
PGconn *conn = NULL;
|
||||||
|
pid_t wal_receiver_pid = UNKNOWN_PID;
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
|
||||||
|
if (runtime_options.disable_wal_receiver == true)
|
||||||
|
{
|
||||||
|
wal_receiver_pid = disable_wal_receiver(conn);
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
if (wal_receiver_pid == UNKNOWN_PID)
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.enable_wal_receiver == true)
|
||||||
|
{
|
||||||
|
wal_receiver_pid = enable_wal_receiver(conn, true);
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
if (wal_receiver_pid == UNKNOWN_PID)
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_error(_("no option provided"));
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For "internal" use by `node rejoin` on the local node when
|
* For "internal" use by `node rejoin` on the local node when
|
||||||
* called by "standby switchover" from the remote node.
|
* called by "standby switchover" from the remote node.
|
||||||
@@ -2541,6 +2786,7 @@ _do_node_archive_config(void)
|
|||||||
|
|
||||||
arcdir = opendir(archive_dir.data);
|
arcdir = opendir(archive_dir.data);
|
||||||
|
|
||||||
|
/* always attempt to open the directory */
|
||||||
if (arcdir == NULL)
|
if (arcdir == NULL)
|
||||||
{
|
{
|
||||||
log_error(_("unable to open archive directory \"%s\""),
|
log_error(_("unable to open archive directory \"%s\""),
|
||||||
@@ -2586,10 +2832,11 @@ _do_node_archive_config(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&arcdir_ent_path);
|
termPQExpBuffer(&arcdir_ent_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
closedir(arcdir);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closedir(arcdir);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* extract list of config files from --config-files
|
* extract list of config files from --config-files
|
||||||
*/
|
*/
|
||||||
@@ -2861,11 +3108,12 @@ copy_file(const char *src_file, const char *dest_file)
|
|||||||
int a = 0;
|
int a = 0;
|
||||||
|
|
||||||
ptr_old = fopen(src_file, "r");
|
ptr_old = fopen(src_file, "r");
|
||||||
ptr_new = fopen(dest_file, "w");
|
|
||||||
|
|
||||||
if (ptr_old == NULL)
|
if (ptr_old == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
ptr_new = fopen(dest_file, "w");
|
||||||
|
|
||||||
if (ptr_new == NULL)
|
if (ptr_new == NULL)
|
||||||
{
|
{
|
||||||
fclose(ptr_old);
|
fclose(ptr_old);
|
||||||
@@ -2922,8 +3170,8 @@ do_node_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV\n"));
|
printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
|
||||||
printf(_(" --nagios emit output in Nagios format (individual status output only)\n"));
|
printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" Following options check an individual status:\n"));
|
printf(_(" Following options check an individual status:\n"));
|
||||||
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
|
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
|
||||||
@@ -2932,6 +3180,7 @@ do_node_help(void)
|
|||||||
printf(_(" --role check node has expected role\n"));
|
printf(_(" --role check node has expected role\n"));
|
||||||
printf(_(" --slots check for inactive replication slots\n"));
|
printf(_(" --slots check for inactive replication slots\n"));
|
||||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||||
|
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
@@ -2963,6 +3212,7 @@ do_node_help(void)
|
|||||||
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
||||||
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
||||||
printf(_(" --list-actions show what command would be performed for each action\n"));
|
printf(_(" --list-actions show what command would be performed for each action\n"));
|
||||||
|
printf(_(" --checkpoint issue a CHECKPOINT before stopping or restarting the node\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-node.h
|
* repmgr-action-node.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -24,6 +24,7 @@ extern void do_node_check(void);
|
|||||||
|
|
||||||
extern void do_node_rejoin(void);
|
extern void do_node_rejoin(void);
|
||||||
extern void do_node_service(void);
|
extern void do_node_service(void);
|
||||||
|
extern void do_node_control(void);
|
||||||
|
|
||||||
extern void do_node_help(void);
|
extern void do_node_help(void);
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements primary actions for the repmgr command line utility
|
* Implements primary actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -251,6 +251,7 @@ do_primary_unregister(void)
|
|||||||
PGconn *primary_conn = NULL;
|
PGconn *primary_conn = NULL;
|
||||||
PGconn *local_conn = NULL;
|
PGconn *local_conn = NULL;
|
||||||
t_node_info local_node_info = T_NODE_INFO_INITIALIZER;
|
t_node_info local_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
|
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
t_node_info *target_node_info_ptr = NULL;
|
t_node_info *target_node_info_ptr = NULL;
|
||||||
PGconn *target_node_conn = NULL;
|
PGconn *target_node_conn = NULL;
|
||||||
@@ -271,8 +272,6 @@ do_primary_unregister(void)
|
|||||||
|
|
||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
|
||||||
|
|
||||||
log_error(_("unable to connect to primary server"));
|
log_error(_("unable to connect to primary server"));
|
||||||
|
|
||||||
if (get_primary_node_record(local_conn, &primary_node_info) == true)
|
if (get_primary_node_record(local_conn, &primary_node_info) == true)
|
||||||
@@ -291,10 +290,19 @@ do_primary_unregister(void)
|
|||||||
/* Local connection no longer required */
|
/* Local connection no longer required */
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
if (get_primary_node_record(primary_conn, &primary_node_info) == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve record for primary node"));
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
/* Target node is local node? */
|
/* Target node is local node? */
|
||||||
if (target_node_info.node_id == UNKNOWN_NODE_ID
|
if (target_node_info.node_id == UNKNOWN_NODE_ID)
|
||||||
|| target_node_info.node_id == config_file_options.node_id)
|
{
|
||||||
|
target_node_info_ptr = &primary_node_info;
|
||||||
|
}
|
||||||
|
else if (target_node_info.node_id == config_file_options.node_id)
|
||||||
{
|
{
|
||||||
target_node_info_ptr = &local_node_info;
|
target_node_info_ptr = &local_node_info;
|
||||||
}
|
}
|
||||||
@@ -304,6 +312,24 @@ do_primary_unregister(void)
|
|||||||
target_node_info_ptr = &target_node_info;
|
target_node_info_ptr = &target_node_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity-check the target node is not a witness
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (target_node_info_ptr->type == WITNESS)
|
||||||
|
{
|
||||||
|
log_error(_("node %s (id: %i) is a witness server, unable to unregister"),
|
||||||
|
target_node_info_ptr->node_name,
|
||||||
|
target_node_info_ptr->node_id);
|
||||||
|
if (target_node_info_ptr->type == STANDBY)
|
||||||
|
{
|
||||||
|
log_hint(_("the node can be unregistered with \"repmgr witness unregister\""));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for downstream nodes - if any still defined, we won't be able to
|
* Check for downstream nodes - if any still defined, we won't be able to
|
||||||
* delete the node record due to foreign key constraints.
|
* delete the node record due to foreign key constraints.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-primary.h
|
* repmgr-action-primary.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-standby.h
|
* repmgr-action-standby.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -28,7 +28,7 @@ extern void do_standby_switchover(void);
|
|||||||
|
|
||||||
extern void do_standby_help(void);
|
extern void do_standby_help(void);
|
||||||
|
|
||||||
extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output, int *error_code);
|
extern bool do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_node_info *follow_target_node_record, PQExpBufferData *output, int general_error_code, int *error_code);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements witness actions for the repmgr command line utility
|
* Implements witness actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -36,9 +36,12 @@ do_witness_register(void)
|
|||||||
{
|
{
|
||||||
PGconn *witness_conn = NULL;
|
PGconn *witness_conn = NULL;
|
||||||
PGconn *primary_conn = NULL;
|
PGconn *primary_conn = NULL;
|
||||||
|
int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
||||||
|
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
bool record_created = false;
|
bool record_created = false;
|
||||||
|
|
||||||
@@ -53,8 +56,7 @@ do_witness_register(void)
|
|||||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s",
|
log_detail("\n%s", PQerrorMessage(witness_conn));
|
||||||
PQerrorMessage(witness_conn));
|
|
||||||
log_hint(_("the witness node must be running before it can be registered"));
|
log_hint(_("the witness node must be running before it can be registered"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -124,6 +126,59 @@ do_witness_register(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* check we can determine the primary node */
|
||||||
|
primary_node_id = get_primary_node_id(primary_conn);
|
||||||
|
|
||||||
|
if (primary_node_id == UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
log_error(_("unable to determine the cluster's primary node"));
|
||||||
|
log_hint(_("ensure the primary node connection details are correct and that it is registered"));
|
||||||
|
PQfinish(witness_conn);
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
record_status = get_node_record(primary_conn, primary_node_id, &primary_node_record);
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve record for primary node %i"),
|
||||||
|
primary_node_id);
|
||||||
|
|
||||||
|
PQfinish(witness_conn);
|
||||||
|
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reconnect to the primary node's conninfo - this will
|
||||||
|
* protect against the situation where the witness connection
|
||||||
|
* details were provided, and we're actually connected to the
|
||||||
|
* witness server.
|
||||||
|
*/
|
||||||
|
|
||||||
|
primary_conn = establish_db_connection_quiet(primary_node_record.conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to reconnect to the primary node (node %i)"), primary_node_id);
|
||||||
|
log_detail(_("primary node's conninfo is \"%s\""), primary_node_record.conninfo);
|
||||||
|
|
||||||
|
PQfinish(witness_conn);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: sanity check witness node is not part of main cluster; we could
|
||||||
|
* add a random application_name to the respective connections,
|
||||||
|
* and do a simple check of pg_stat_activity
|
||||||
|
*/
|
||||||
|
|
||||||
/* check that primary node is not a BDR node */
|
/* check that primary node is not a BDR node */
|
||||||
if (is_bdr_db_quiet(primary_conn) == true)
|
if (is_bdr_db_quiet(primary_conn) == true)
|
||||||
{
|
{
|
||||||
@@ -136,11 +191,6 @@ do_witness_register(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: sanity check witness node is not part of main cluster; we could
|
|
||||||
* add a random application_name to the respective connections,
|
|
||||||
* and do a simple check of pg_stat_activity
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* create repmgr extension, if does not exist */
|
/* create repmgr extension, if does not exist */
|
||||||
if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn))
|
if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn))
|
||||||
@@ -214,33 +264,45 @@ do_witness_register(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extension_status = get_repmgr_extension_status(witness_conn, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if repmgr.nodes contains entries, delete if -F/--force provided,
|
* Check if the witness database already contains node records;
|
||||||
* otherwise exit with error
|
* only do this if the extension is actually installed.
|
||||||
*/
|
*/
|
||||||
if (get_all_node_records(witness_conn, &nodes) == false)
|
if (extension_status == REPMGR_INSTALLED
|
||||||
|
|| extension_status == REPMGR_OLD_VERSION_INSTALLED)
|
||||||
{
|
{
|
||||||
/* get_all_node_records() will display the error */
|
/*
|
||||||
PQfinish(witness_conn);
|
* if repmgr.nodes contains entries, exit with error unless
|
||||||
PQfinish(primary_conn);
|
* -F/--force provided (which will cause the existing records
|
||||||
exit(ERR_BAD_CONFIG);
|
* to be overwritten)
|
||||||
}
|
*/
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
|
if (get_all_node_records(witness_conn, &nodes) == false)
|
||||||
|
|
||||||
if (nodes.node_count > 0)
|
|
||||||
{
|
|
||||||
if (!runtime_options.force)
|
|
||||||
{
|
{
|
||||||
log_error(_("witness node is already initialised and contains node records"));
|
/* get_all_node_records() will display the error */
|
||||||
log_hint(_("use option -F/--force to reinitialise the node"));
|
|
||||||
PQfinish(primary_conn);
|
|
||||||
PQfinish(witness_conn);
|
PQfinish(witness_conn);
|
||||||
|
PQfinish(primary_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
clear_node_info_list(&nodes);
|
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
|
||||||
|
|
||||||
|
if (nodes.node_count > 0)
|
||||||
|
{
|
||||||
|
if (!runtime_options.force)
|
||||||
|
{
|
||||||
|
log_error(_("witness node is already initialised and contains node records"));
|
||||||
|
log_hint(_("use option -F/--force to reinitialise the node"));
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
PQfinish(witness_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_node_info_list(&nodes);
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
{
|
{
|
||||||
@@ -262,7 +324,7 @@ do_witness_register(void)
|
|||||||
/* these values are mandatory, setting them to anything else has no point */
|
/* these values are mandatory, setting them to anything else has no point */
|
||||||
node_record.type = WITNESS;
|
node_record.type = WITNESS;
|
||||||
node_record.priority = 0;
|
node_record.priority = 0;
|
||||||
node_record.upstream_node_id = get_primary_node_id(primary_conn);
|
node_record.upstream_node_id = primary_node_id;
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
if (record_status == RECORD_FOUND)
|
||||||
{
|
{
|
||||||
@@ -348,7 +410,7 @@ do_witness_unregister(void)
|
|||||||
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,7 +436,7 @@ do_witness_unregister(void)
|
|||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to primary"));
|
log_error(_("unable to connect to primary"));
|
||||||
log_detail("%s", PQerrorMessage(primary_conn));
|
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||||
|
|
||||||
if (local_node_available == true)
|
if (local_node_available == true)
|
||||||
{
|
{
|
||||||
@@ -471,13 +533,15 @@ void do_witness_help(void)
|
|||||||
printf(_("Usage:\n"));
|
printf(_("Usage:\n"));
|
||||||
printf(_(" %s [OPTIONS] witness register\n"), progname());
|
printf(_(" %s [OPTIONS] witness register\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] witness unregister\n"), progname());
|
printf(_(" %s [OPTIONS] witness unregister\n"), progname());
|
||||||
|
puts("");
|
||||||
printf(_("WITNESS REGISTER\n"));
|
printf(_("WITNESS REGISTER\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"witness register\" registers a witness node.\n"));
|
printf(_(" \"witness register\" registers a witness node.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" Requires provision of connection information for the primary\n"));
|
printf(_(" Requires provision of connection information for the primary node,\n"));
|
||||||
|
printf(_(" typically usually just the host name.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
printf(_(" -h/--host host name of the primary node\n"));
|
||||||
printf(_(" --dry-run check prerequisites but don't make any changes\n"));
|
printf(_(" --dry-run check prerequisites but don't make any changes\n"));
|
||||||
printf(_(" -F, --force overwrite an existing node record\n"));
|
printf(_(" -F, --force overwrite an existing node record\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-witness.h
|
* repmgr-action-witness.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-client-global.h
|
* repmgr-client-global.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,14 +35,16 @@ typedef struct
|
|||||||
bool connection_param_provided;
|
bool connection_param_provided;
|
||||||
bool host_param_provided;
|
bool host_param_provided;
|
||||||
bool limit_provided;
|
bool limit_provided;
|
||||||
|
bool wait_provided;
|
||||||
|
|
||||||
/* general configuration options */
|
/* general configuration options */
|
||||||
char config_file[MAXPGPATH];
|
char config_file[MAXPGPATH];
|
||||||
bool dry_run;
|
bool dry_run;
|
||||||
bool force;
|
bool force;
|
||||||
char pg_bindir[MAXLEN]; /* overrides setting in repmgr.conf */
|
char pg_bindir[MAXLEN]; /* overrides setting in repmgr.conf */
|
||||||
bool wait;
|
int wait;
|
||||||
bool no_wait;
|
bool no_wait;
|
||||||
|
bool compact;
|
||||||
|
|
||||||
/* logging options */
|
/* logging options */
|
||||||
char log_level[MAXLEN]; /* overrides setting in repmgr.conf */
|
char log_level[MAXLEN]; /* overrides setting in repmgr.conf */
|
||||||
@@ -68,7 +70,7 @@ typedef struct
|
|||||||
|
|
||||||
/* general node options */
|
/* general node options */
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char data_dir[MAXPGPATH];
|
char data_dir[MAXPGPATH];
|
||||||
int remote_node_id;
|
int remote_node_id;
|
||||||
|
|
||||||
@@ -111,8 +113,9 @@ typedef struct
|
|||||||
bool missing_slots;
|
bool missing_slots;
|
||||||
bool has_passfile;
|
bool has_passfile;
|
||||||
bool replication_connection;
|
bool replication_connection;
|
||||||
|
bool data_directory_config;
|
||||||
|
|
||||||
/* "node join" options */
|
/* "node rejoin" options */
|
||||||
char config_files[MAXLEN];
|
char config_files[MAXLEN];
|
||||||
|
|
||||||
/* "node service" options */
|
/* "node service" options */
|
||||||
@@ -132,13 +135,15 @@ typedef struct
|
|||||||
/* following options for internal use */
|
/* following options for internal use */
|
||||||
char config_archive_dir[MAXPGPATH];
|
char config_archive_dir[MAXPGPATH];
|
||||||
OutputMode output_mode;
|
OutputMode output_mode;
|
||||||
|
bool disable_wal_receiver;
|
||||||
|
bool enable_wal_receiver;
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { \
|
#define T_RUNTIME_OPTIONS_INITIALIZER { \
|
||||||
/* configuration metadata */ \
|
/* configuration metadata */ \
|
||||||
false, false, false, false, \
|
false, false, false, false, false, \
|
||||||
/* general configuration options */ \
|
/* general configuration options */ \
|
||||||
"", false, false, "", false, false, \
|
"", false, false, "", -1, false, false, \
|
||||||
/* logging options */ \
|
/* logging options */ \
|
||||||
"", false, false, false, false, \
|
"", false, false, false, false, \
|
||||||
/* output options */ \
|
/* output options */ \
|
||||||
@@ -161,8 +166,8 @@ typedef struct
|
|||||||
/* "node status" options */ \
|
/* "node status" options */ \
|
||||||
false, \
|
false, \
|
||||||
/* "node check" options */ \
|
/* "node check" options */ \
|
||||||
false, false, false, false, false, false, false, false, \
|
false, false, false, false, false, false, false, false, false, \
|
||||||
/* "node join" options */ \
|
/* "node rejoin" options */ \
|
||||||
"", \
|
"", \
|
||||||
/* "node service" options */ \
|
/* "node service" options */ \
|
||||||
"", false, false, false, \
|
"", false, false, false, \
|
||||||
@@ -171,7 +176,7 @@ typedef struct
|
|||||||
/* "cluster cleanup" options */ \
|
/* "cluster cleanup" options */ \
|
||||||
0, \
|
0, \
|
||||||
/* following options for internal use */ \
|
/* following options for internal use */ \
|
||||||
"/tmp", OM_TEXT \
|
"/tmp", OM_TEXT, false, false \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -199,6 +204,7 @@ typedef struct ColHeader
|
|||||||
char title[MAXLEN];
|
char title[MAXLEN];
|
||||||
int max_length;
|
int max_length;
|
||||||
int cur_length;
|
int cur_length;
|
||||||
|
bool display;
|
||||||
} ColHeader;
|
} ColHeader;
|
||||||
|
|
||||||
|
|
||||||
@@ -220,8 +226,6 @@ extern int check_server_version(PGconn *conn, char *server_type, bool exit_on_er
|
|||||||
extern void check_93_config(void);
|
extern void check_93_config(void);
|
||||||
extern bool create_repmgr_extension(PGconn *conn);
|
extern bool create_repmgr_extension(PGconn *conn);
|
||||||
extern int test_ssh_connection(char *host, char *remote_user);
|
extern int test_ssh_connection(char *host, char *remote_user);
|
||||||
extern bool local_command(const char *command, PQExpBufferData *outputbuf);
|
|
||||||
extern bool local_command_simple(const char *command, PQExpBufferData *outputbuf);
|
|
||||||
|
|
||||||
extern standy_clone_mode get_standby_clone_mode(void);
|
extern standy_clone_mode get_standby_clone_mode(void);
|
||||||
|
|
||||||
@@ -234,9 +238,9 @@ extern char *make_pg_path(const char *file);
|
|||||||
|
|
||||||
extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn);
|
extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn);
|
||||||
|
|
||||||
extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf);
|
|
||||||
|
|
||||||
extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
|
extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
|
||||||
|
extern void make_repmgrd_path(PQExpBufferData *output_buf);
|
||||||
|
|
||||||
|
|
||||||
/* display functions */
|
/* display functions */
|
||||||
extern void print_help_header(void);
|
extern void print_help_header(void);
|
||||||
@@ -251,4 +255,8 @@ extern void init_node_record(t_node_info *node_record);
|
|||||||
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||||
extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||||
|
|
||||||
|
extern bool check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin);
|
||||||
|
extern void check_shared_library(PGconn *conn);
|
||||||
|
extern bool is_repmgrd_running(PGconn *conn);
|
||||||
|
|
||||||
#endif /* _REPMGR_CLIENT_GLOBAL_H_ */
|
#endif /* _REPMGR_CLIENT_GLOBAL_H_ */
|
||||||
|
|||||||
644
repmgr-client.c
644
repmgr-client.c
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-client.h
|
* repmgr-client.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -40,14 +40,17 @@
|
|||||||
#define NODE_CHECK 14
|
#define NODE_CHECK 14
|
||||||
#define NODE_SERVICE 15
|
#define NODE_SERVICE 15
|
||||||
#define NODE_REJOIN 16
|
#define NODE_REJOIN 16
|
||||||
#define CLUSTER_SHOW 17
|
#define NODE_CONTROL 17
|
||||||
#define CLUSTER_CLEANUP 18
|
#define CLUSTER_SHOW 18
|
||||||
#define CLUSTER_MATRIX 19
|
#define CLUSTER_CLEANUP 19
|
||||||
#define CLUSTER_CROSSCHECK 20
|
#define CLUSTER_MATRIX 20
|
||||||
#define CLUSTER_EVENT 21
|
#define CLUSTER_CROSSCHECK 21
|
||||||
#define DAEMON_STATUS 22
|
#define CLUSTER_EVENT 22
|
||||||
#define DAEMON_PAUSE 23
|
#define DAEMON_STATUS 23
|
||||||
#define DAEMON_UNPAUSE 24
|
#define DAEMON_PAUSE 24
|
||||||
|
#define DAEMON_UNPAUSE 25
|
||||||
|
#define DAEMON_START 26
|
||||||
|
#define DAEMON_STOP 27
|
||||||
|
|
||||||
/* command line options without short versions */
|
/* command line options without short versions */
|
||||||
#define OPT_HELP 1001
|
#define OPT_HELP 1001
|
||||||
@@ -92,6 +95,11 @@
|
|||||||
#define OPT_NO_WAIT 1040
|
#define OPT_NO_WAIT 1040
|
||||||
#define OPT_MISSING_SLOTS 1041
|
#define OPT_MISSING_SLOTS 1041
|
||||||
#define OPT_REPMGRD_NO_PAUSE 1042
|
#define OPT_REPMGRD_NO_PAUSE 1042
|
||||||
|
#define OPT_VERSION_NUMBER 1043
|
||||||
|
#define OPT_DATA_DIRECTORY_CONFIG 1044
|
||||||
|
#define OPT_COMPACT 1045
|
||||||
|
#define OPT_DISABLE_WAL_RECEIVER 1046
|
||||||
|
#define OPT_ENABLE_WAL_RECEIVER 1047
|
||||||
|
|
||||||
/* deprecated since 3.3 */
|
/* deprecated since 3.3 */
|
||||||
#define OPT_DATA_DIR 999
|
#define OPT_DATA_DIR 999
|
||||||
@@ -102,16 +110,18 @@
|
|||||||
static struct option long_options[] =
|
static struct option long_options[] =
|
||||||
{
|
{
|
||||||
/* general options */
|
/* general options */
|
||||||
{"version", no_argument, NULL, 'V'},
|
|
||||||
{"help", no_argument, NULL, OPT_HELP},
|
{"help", no_argument, NULL, OPT_HELP},
|
||||||
|
{"version", no_argument, NULL, 'V'},
|
||||||
|
{"version-number", no_argument, NULL, OPT_VERSION_NUMBER},
|
||||||
|
|
||||||
/* general configuration options */
|
/* general configuration options */
|
||||||
{"config-file", required_argument, NULL, 'f'},
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
{"dry-run", no_argument, NULL, OPT_DRY_RUN},
|
{"dry-run", no_argument, NULL, OPT_DRY_RUN},
|
||||||
{"force", no_argument, NULL, 'F'},
|
{"force", no_argument, NULL, 'F'},
|
||||||
{"pg_bindir", required_argument, NULL, 'b'},
|
{"pg_bindir", required_argument, NULL, 'b'},
|
||||||
{"wait", no_argument, NULL, 'w'},
|
{"wait", optional_argument, NULL, 'w'},
|
||||||
{"no-wait", no_argument, NULL, 'W'},
|
{"no-wait", no_argument, NULL, 'W'},
|
||||||
|
{"compact", no_argument, NULL, OPT_COMPACT},
|
||||||
|
|
||||||
/* connection options */
|
/* connection options */
|
||||||
{"dbname", required_argument, NULL, 'd'},
|
{"dbname", required_argument, NULL, 'd'},
|
||||||
@@ -156,7 +166,7 @@ static struct option long_options[] =
|
|||||||
|
|
||||||
/* "standby switchover" options
|
/* "standby switchover" options
|
||||||
*
|
*
|
||||||
* Note: --force-rewind accepted to pass to "node join"
|
* Note: --force-rewind accepted to pass to "node rejoin"
|
||||||
*/
|
*/
|
||||||
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
|
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
|
||||||
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
|
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
|
||||||
@@ -174,6 +184,7 @@ static struct option long_options[] =
|
|||||||
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
||||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||||
|
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
||||||
|
|
||||||
/* "node rejoin" options */
|
/* "node rejoin" options */
|
||||||
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
||||||
@@ -193,6 +204,10 @@ static struct option long_options[] =
|
|||||||
/* "cluster cleanup" options */
|
/* "cluster cleanup" options */
|
||||||
{"keep-history", required_argument, NULL, 'k'},
|
{"keep-history", required_argument, NULL, 'k'},
|
||||||
|
|
||||||
|
/* undocumented options for testing */
|
||||||
|
{"disable-wal-receiver", no_argument, NULL, OPT_DISABLE_WAL_RECEIVER},
|
||||||
|
{"enable-wal-receiver", no_argument, NULL, OPT_ENABLE_WAL_RECEIVER},
|
||||||
|
|
||||||
/* deprecated */
|
/* deprecated */
|
||||||
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
||||||
{"no-conninfo-password", no_argument, NULL, OPT_NO_CONNINFO_PASSWORD},
|
{"no-conninfo-password", no_argument, NULL, OPT_NO_CONNINFO_PASSWORD},
|
||||||
|
|||||||
105
repmgr.c
105
repmgr.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.c - repmgr extension
|
* repmgr.c - repmgr extension
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||||
*
|
*
|
||||||
* This is the actual extension code; see repmgr-client.c for the code which
|
* This is the actual extension code; see repmgr-client.c for the code which
|
||||||
* generates the repmgr binary
|
* generates the repmgr binary
|
||||||
@@ -40,7 +40,6 @@
|
|||||||
|
|
||||||
#include "utils/timestamp.h"
|
#include "utils/timestamp.h"
|
||||||
|
|
||||||
#include "executor/spi.h"
|
|
||||||
#include "lib/stringinfo.h"
|
#include "lib/stringinfo.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "utils/snapmgr.h"
|
#include "utils/snapmgr.h"
|
||||||
@@ -54,11 +53,12 @@
|
|||||||
#include "voting.h"
|
#include "voting.h"
|
||||||
|
|
||||||
#define UNKNOWN_NODE_ID -1
|
#define UNKNOWN_NODE_ID -1
|
||||||
|
#define ELECTION_RERUN_NOTIFICATION -2
|
||||||
#define UNKNOWN_PID -1
|
#define UNKNOWN_PID -1
|
||||||
|
|
||||||
#define TRANCHE_NAME "repmgrd"
|
#define TRANCHE_NAME "repmgrd"
|
||||||
#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
|
#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
|
||||||
|
#define REPMGRD_STATE_FILE_BUF_SIZE 128
|
||||||
|
|
||||||
PG_MODULE_MAGIC;
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
@@ -78,6 +78,7 @@ typedef struct repmgrdSharedState
|
|||||||
char repmgrd_pidfile[MAXPGPATH];
|
char repmgrd_pidfile[MAXPGPATH];
|
||||||
bool repmgrd_paused;
|
bool repmgrd_paused;
|
||||||
/* streaming failover */
|
/* streaming failover */
|
||||||
|
TimestampTz upstream_last_seen;
|
||||||
NodeVotingStatus voting_status;
|
NodeVotingStatus voting_status;
|
||||||
int current_electoral_term;
|
int current_electoral_term;
|
||||||
int candidate_node_id;
|
int candidate_node_id;
|
||||||
@@ -108,6 +109,12 @@ PG_FUNCTION_INFO_V1(standby_set_last_updated);
|
|||||||
Datum standby_get_last_updated(PG_FUNCTION_ARGS);
|
Datum standby_get_last_updated(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(standby_get_last_updated);
|
PG_FUNCTION_INFO_V1(standby_get_last_updated);
|
||||||
|
|
||||||
|
Datum set_upstream_last_seen(PG_FUNCTION_ARGS);
|
||||||
|
PG_FUNCTION_INFO_V1(set_upstream_last_seen);
|
||||||
|
|
||||||
|
Datum get_upstream_last_seen(PG_FUNCTION_ARGS);
|
||||||
|
PG_FUNCTION_INFO_V1(get_upstream_last_seen);
|
||||||
|
|
||||||
Datum notify_follow_primary(PG_FUNCTION_ARGS);
|
Datum notify_follow_primary(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(notify_follow_primary);
|
PG_FUNCTION_INFO_V1(notify_follow_primary);
|
||||||
|
|
||||||
@@ -141,6 +148,8 @@ PG_FUNCTION_INFO_V1(repmgrd_pause);
|
|||||||
Datum repmgrd_is_paused(PG_FUNCTION_ARGS);
|
Datum repmgrd_is_paused(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(repmgrd_is_paused);
|
PG_FUNCTION_INFO_V1(repmgrd_is_paused);
|
||||||
|
|
||||||
|
Datum get_wal_receiver_pid(PG_FUNCTION_ARGS);
|
||||||
|
PG_FUNCTION_INFO_V1(get_wal_receiver_pid);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -219,6 +228,8 @@ repmgr_shmem_startup(void)
|
|||||||
memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
|
memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
|
||||||
shared_state->repmgrd_paused = false;
|
shared_state->repmgrd_paused = false;
|
||||||
shared_state->current_electoral_term = 0;
|
shared_state->current_electoral_term = 0;
|
||||||
|
/* arbitrary "magic" date to indicate this field hasn't been updated */
|
||||||
|
shared_state->upstream_last_seen = POSTGRES_EPOCH_JDATE;
|
||||||
shared_state->voting_status = VS_NO_VOTE;
|
shared_state->voting_status = VS_NO_VOTE;
|
||||||
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
||||||
shared_state->follow_new_primary = false;
|
shared_state->follow_new_primary = false;
|
||||||
@@ -256,8 +267,8 @@ set_local_node_id(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
if (file != NULL)
|
if (file != NULL)
|
||||||
{
|
{
|
||||||
int buffer_size = 128;
|
int buffer_size = REPMGRD_STATE_FILE_BUF_SIZE;
|
||||||
char buffer[buffer_size];
|
char buffer[REPMGRD_STATE_FILE_BUF_SIZE];
|
||||||
|
|
||||||
if (fgets(buffer, buffer_size, file) != NULL)
|
if (fgets(buffer, buffer_size, file) != NULL)
|
||||||
{
|
{
|
||||||
@@ -354,6 +365,54 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Datum
|
||||||
|
set_upstream_last_seen(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
if (!shared_state)
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
|
||||||
|
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
|
shared_state->upstream_last_seen = GetCurrentTimestamp();
|
||||||
|
|
||||||
|
LWLockRelease(shared_state->lock);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Datum
|
||||||
|
get_upstream_last_seen(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
long secs;
|
||||||
|
int microsecs;
|
||||||
|
TimestampTz last_seen;
|
||||||
|
|
||||||
|
if (!shared_state)
|
||||||
|
PG_RETURN_INT32(-1);
|
||||||
|
|
||||||
|
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||||
|
|
||||||
|
last_seen = shared_state->upstream_last_seen;
|
||||||
|
|
||||||
|
LWLockRelease(shared_state->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "last_seen" is initialised with the PostgreSQL epoch as a
|
||||||
|
* "magic" value to indicate the field hasn't ever been updated
|
||||||
|
* by repmgrd. We return -1 instead, rather than imply that the
|
||||||
|
* primary was last seen at the turn of the century.
|
||||||
|
*/
|
||||||
|
if (last_seen == POSTGRES_EPOCH_JDATE)
|
||||||
|
PG_RETURN_INT32(-1);
|
||||||
|
|
||||||
|
|
||||||
|
TimestampDifference(last_seen, GetCurrentTimestamp(),
|
||||||
|
&secs, µsecs);
|
||||||
|
|
||||||
|
/* let's hope repmgrd never runs for more than a century or so without seeing a primary */
|
||||||
|
PG_RETURN_INT32((uint32)secs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ===================*/
|
/* ===================*/
|
||||||
@@ -367,10 +426,10 @@ notify_follow_primary(PG_FUNCTION_ARGS)
|
|||||||
int primary_node_id = UNKNOWN_NODE_ID;
|
int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
if (!shared_state)
|
if (!shared_state)
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_VOID();
|
||||||
|
|
||||||
if (PG_ARGISNULL(0))
|
if (PG_ARGISNULL(0))
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_VOID();
|
||||||
|
|
||||||
primary_node_id = PG_GETARG_INT32(0);
|
primary_node_id = PG_GETARG_INT32(0);
|
||||||
|
|
||||||
@@ -379,9 +438,17 @@ notify_follow_primary(PG_FUNCTION_ARGS)
|
|||||||
/* only do something if local_node_id is initialised */
|
/* only do something if local_node_id is initialised */
|
||||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||||
{
|
{
|
||||||
elog(INFO, "node %i received notification to follow node %i",
|
if (primary_node_id == ELECTION_RERUN_NOTIFICATION)
|
||||||
shared_state->local_node_id,
|
{
|
||||||
primary_node_id);
|
elog(INFO, "node %i received notification to rerun promotion candidate election",
|
||||||
|
shared_state->local_node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
elog(INFO, "node %i received notification to follow node %i",
|
||||||
|
shared_state->local_node_id,
|
||||||
|
primary_node_id);
|
||||||
|
}
|
||||||
|
|
||||||
LWLockRelease(shared_state->lock);
|
LWLockRelease(shared_state->lock);
|
||||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||||
@@ -402,7 +469,7 @@ get_new_primary(PG_FUNCTION_ARGS)
|
|||||||
int new_primary_node_id = UNKNOWN_NODE_ID;
|
int new_primary_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
if (!shared_state)
|
if (!shared_state)
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_INT32(UNKNOWN_NODE_ID);
|
||||||
|
|
||||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||||
|
|
||||||
@@ -412,7 +479,7 @@ get_new_primary(PG_FUNCTION_ARGS)
|
|||||||
LWLockRelease(shared_state->lock);
|
LWLockRelease(shared_state->lock);
|
||||||
|
|
||||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_INT32(UNKNOWN_NODE_ID);
|
||||||
|
|
||||||
PG_RETURN_INT32(new_primary_node_id);
|
PG_RETURN_INT32(new_primary_node_id);
|
||||||
}
|
}
|
||||||
@@ -680,3 +747,17 @@ repmgrd_is_paused(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
PG_RETURN_BOOL(is_paused);
|
PG_RETURN_BOOL(is_paused);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Datum
|
||||||
|
get_wal_receiver_pid(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
int wal_receiver_pid;
|
||||||
|
|
||||||
|
if (!shared_state)
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
|
||||||
|
wal_receiver_pid = WalRcv->pid;
|
||||||
|
|
||||||
|
PG_RETURN_INT32(wal_receiver_pid);
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,7 +5,14 @@
|
|||||||
# Some configuration items will be set with a default value; this
|
# Some configuration items will be set with a default value; this
|
||||||
# is noted for each item. Where no default value is shown, the
|
# is noted for each item. Where no default value is shown, the
|
||||||
# parameter will be treated as empty or false.
|
# parameter will be treated as empty or false.
|
||||||
|
#
|
||||||
|
# IMPORTANT: string values can be provided as-is, or enclosed in single quotes
|
||||||
|
# (but not double-quotes, which will be interpreted as part of the string),
|
||||||
|
# e.g.:
|
||||||
|
#
|
||||||
|
# node_name=foo
|
||||||
|
# node_name = 'foo'
|
||||||
|
#
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Required configuration items
|
# Required configuration items
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -18,22 +25,24 @@
|
|||||||
# using the server's hostname or another identifier
|
# using the server's hostname or another identifier
|
||||||
# unambiguously associated with the server to avoid
|
# unambiguously associated with the server to avoid
|
||||||
# confusion. Avoid choosing names which reflect the
|
# confusion. Avoid choosing names which reflect the
|
||||||
# node's current role, e.g. "primary" or "standby1",
|
# node's current role, e.g. 'primary' or 'standby1',
|
||||||
# as roles can change and it will be confusing if
|
# as roles can change and it will be confusing if
|
||||||
# the current primary is called "standby1".
|
# the current primary is called 'standby1'.
|
||||||
|
# The string's maximum length is 63 characters and it should
|
||||||
|
# contain only printable ASCII characters.
|
||||||
|
|
||||||
#conninfo='' # Database connection information as a conninfo string.
|
#conninfo='' # Database connection information as a conninfo string.
|
||||||
# All servers in the cluster must be able to connect to
|
# All servers in the cluster must be able to connect to
|
||||||
# the local node using this string.
|
# the local node using this string.
|
||||||
#
|
#
|
||||||
# For details on conninfo strings, see:
|
# For details on conninfo strings, see:
|
||||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
# https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
|
||||||
#
|
#
|
||||||
# If repmgrd is in use, consider explicitly setting
|
# If repmgrd is in use, consider explicitly setting
|
||||||
# "connect_timeout" in the conninfo string to determine
|
# "connect_timeout" in the conninfo string to determine
|
||||||
# the length of time which elapses before a network
|
# the length of time which elapses before a network
|
||||||
# connection attempt is abandoned; for details see:
|
# connection attempt is abandoned; for details see:
|
||||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
|
# https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
|
||||||
|
|
||||||
#data_directory='' # The node's data directory. This is needed by repmgr
|
#data_directory='' # The node's data directory. This is needed by repmgr
|
||||||
# when performing operations when the PostgreSQL instance
|
# when performing operations when the PostgreSQL instance
|
||||||
@@ -241,6 +250,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# for the demoted standby to reconnect to the promoted
|
# for the demoted standby to reconnect to the promoted
|
||||||
# primary (note: this value should be equal to or greater
|
# primary (note: this value should be equal to or greater
|
||||||
# than that set for "node_rejoin_timeout")
|
# than that set for "node_rejoin_timeout")
|
||||||
|
#wal_receive_check_timeout=30 # The max length of time (in seconds) to wait for the walreceiver
|
||||||
|
# on the standby to flush WAL to disk before comparing location
|
||||||
|
# with the shut-down primary
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# "node rejoin" settings
|
# "node rejoin" settings
|
||||||
@@ -269,11 +281,6 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# These settings are only applied when repmgrd is running. Values shown
|
# These settings are only applied when repmgrd is running. Values shown
|
||||||
# are defaults.
|
# are defaults.
|
||||||
|
|
||||||
#repmgrd_pid_file= # Path of PID file to use for repmgrd; if not set, a PID file will
|
|
||||||
# be generated in a temporary directory specified by the environment
|
|
||||||
# variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
|
|
||||||
# by the command line option "-p/--pid-file"; the command line option
|
|
||||||
# "--no-pid-file" will force PID file creation to be skipped.
|
|
||||||
#failover=manual # one of 'automatic', 'manual'.
|
#failover=manual # one of 'automatic', 'manual'.
|
||||||
# determines what action to take in the event of upstream failure
|
# determines what action to take in the event of upstream failure
|
||||||
#
|
#
|
||||||
@@ -283,10 +290,13 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# manual attention to reattach it to replication
|
# manual attention to reattach it to replication
|
||||||
# (does not apply to BDR mode)
|
# (does not apply to BDR mode)
|
||||||
|
|
||||||
#priority=100 # indicate a preferred priority for promoting nodes;
|
#priority=100 # indicates a preferred priority for promoting nodes;
|
||||||
# a value of zero prevents the node being promoted to primary
|
# a value of zero prevents the node being promoted to primary
|
||||||
# (default: 100)
|
# (default: 100)
|
||||||
|
|
||||||
|
#connection_check_type=ping # How to check availability of the upstream node; valid options:
|
||||||
|
# 'ping': use PQping() to check if the node is accepting connections
|
||||||
|
# 'connection': execute a throwaway query on the current connection
|
||||||
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
||||||
# primary (or other upstream node)
|
# primary (or other upstream node)
|
||||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||||
@@ -310,10 +320,29 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
||||||
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
||||||
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
||||||
# server being monitored is no longer available. -1 (default)
|
# server(s) being monitored are no longer available. -1 (default)
|
||||||
# disables the timeout completely.
|
# disables the timeout completely.
|
||||||
#async_query_timeout=60 # Interval (in seconds) which repmgrd will wait before
|
#async_query_timeout=60 # Interval (in seconds) which repmgrd will wait before
|
||||||
# cancelling an asynchronous query.
|
# cancelling an asynchronous query.
|
||||||
|
#repmgrd_pid_file= # Path of PID file to use for repmgrd; if not set, a PID file will
|
||||||
|
# be generated in a temporary directory specified by the environment
|
||||||
|
# variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
|
||||||
|
# by the command line option "-p/--pid-file"; the command line option
|
||||||
|
# "--no-pid-file" will force PID file creation to be skipped.
|
||||||
|
# Note: there is normally no need to set this, particularly if
|
||||||
|
# repmgr was installed from packages.
|
||||||
|
#standby_disconnect_on_failover=false # If "true", in a failover situation wait for all standbys to
|
||||||
|
# disconnect their WAL receivers before electing a new primary
|
||||||
|
# (PostgreSQL 9.5 and later only; repmgr user must be a superuser for this)
|
||||||
|
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover" is true, the maximum length of time
|
||||||
|
# (in seconds) to wait for other standbys to confirm they have disconnected their
|
||||||
|
# WAL receivers
|
||||||
|
#failover_validation_command= # Script to execute for an external mechanism to validate the failover
|
||||||
|
# decision made by repmgrd. One or both of the following parameter placeholders
|
||||||
|
# should be provided, which will be replaced by repmgrd with the appropriate
|
||||||
|
# value: %n (node_id), %a (node_name). *Must* be the same on all nodes.
|
||||||
|
#election_rerun_interval=15 # if "failover_validation_command" is set, and the command returns
|
||||||
|
# an error, pause the specified amount of seconds before rerunning the election.
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# service control commands
|
# service control commands
|
||||||
@@ -322,6 +351,12 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# repmgr provides options to override the default pg_ctl commands
|
# repmgr provides options to override the default pg_ctl commands
|
||||||
# used to stop, start, restart, reload and promote the PostgreSQL cluster
|
# used to stop, start, restart, reload and promote the PostgreSQL cluster
|
||||||
#
|
#
|
||||||
|
# These options are useful when PostgreSQL has been installed from a package
|
||||||
|
# which provides OS-level service commands. In environments using an init system
|
||||||
|
# such as systemd, which keeps track of the state of various services, it is
|
||||||
|
# essential that the service commands are correctly configured and pg_ctl is
|
||||||
|
# not executed directly.
|
||||||
|
#
|
||||||
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
||||||
# to function correctly.
|
# to function correctly.
|
||||||
#
|
#
|
||||||
@@ -343,7 +378,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#
|
#
|
||||||
# Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
|
# Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
|
||||||
#
|
#
|
||||||
# For more details, see: https://repmgr.org/docs/4.1/configuration-service-commands.html
|
# For more details, see: https://repmgr.org/docs/current/configuration-service-commands.html
|
||||||
|
|
||||||
#service_start_command = ''
|
#service_start_command = ''
|
||||||
#service_stop_command = ''
|
#service_stop_command = ''
|
||||||
@@ -355,6 +390,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# for "promote_command"; do not use "repmgr standby promote"
|
# for "promote_command"; do not use "repmgr standby promote"
|
||||||
# (or a script which executes "repmgr standby promote") here.
|
# (or a script which executes "repmgr standby promote") here.
|
||||||
|
|
||||||
|
# Used by "repmgr daemon (start|stop)" to control repmgrd
|
||||||
|
#
|
||||||
|
#repmgrd_service_start_command = ''
|
||||||
|
#repmgrd_service_stop_command = ''
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# Status check thresholds
|
# Status check thresholds
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# repmgr extension
|
# repmgr extension
|
||||||
comment = 'Replication manager for PostgreSQL'
|
comment = 'Replication manager for PostgreSQL'
|
||||||
default_version = '4.2'
|
default_version = '4.3'
|
||||||
module_pathname = '$libdir/repmgr'
|
module_pathname = '$libdir/repmgr'
|
||||||
relocatable = false
|
relocatable = false
|
||||||
schema = repmgr
|
schema = repmgr
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user