mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
218 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1fe01e9168 | ||
|
|
ed1136f443 | ||
|
|
a7ed60a533 | ||
|
|
fc5a18410d | ||
|
|
fd52c8ec3c | ||
|
|
47f1c6fa84 | ||
|
|
fba89ef37c | ||
|
|
4cc6cbe32f | ||
|
|
c715077c29 | ||
|
|
c178d8ed27 | ||
|
|
d4d06f43f7 | ||
|
|
0d346a9f54 | ||
|
|
abb16e4366 | ||
|
|
59b1924d5b | ||
|
|
c88ea62643 | ||
|
|
5b91a5e2e5 | ||
|
|
c2a1a35282 | ||
|
|
2b8b74ae75 | ||
|
|
08ef4d4be6 | ||
|
|
1a0049f086 | ||
|
|
af6f0fc2cf | ||
|
|
893d67473d | ||
|
|
a922cd5558 | ||
|
|
7bbc664230 | ||
|
|
a6998fe0f9 | ||
|
|
dadfdcc51f | ||
|
|
b8823d5c1f | ||
|
|
e59b57376d | ||
|
|
3db87e6a31 | ||
|
|
94d05619c3 | ||
|
|
807c7c926c | ||
|
|
df68f1f3f6 | ||
|
|
d4c75bb6c7 | ||
|
|
94d4e1128d | ||
|
|
dbd82ba687 | ||
|
|
0888fbc538 | ||
|
|
92a84bd950 | ||
|
|
a3318d65d2 | ||
|
|
374e9811c9 | ||
|
|
72f9b0145a | ||
|
|
5e03ef40cb | ||
|
|
091541619d | ||
|
|
5e9db47d12 | ||
|
|
e8a0cd33b5 | ||
|
|
8cd79fd7dd | ||
|
|
013b4b4b8a | ||
|
|
c5a721a3cf | ||
|
|
a6294b7da0 | ||
|
|
a0f02e454c | ||
|
|
69d9d137e0 | ||
|
|
60bceae905 | ||
|
|
746c9793ed | ||
|
|
c30447ac90 | ||
|
|
097024a32f | ||
|
|
66b7dbbed7 | ||
|
|
74f6f97f26 | ||
|
|
968c2f1954 | ||
|
|
bd76d0eb92 | ||
|
|
f1ee6e19b6 | ||
|
|
fbb65b4a43 | ||
|
|
3fac975de6 | ||
|
|
a2b5ba595a | ||
|
|
c16ab3c889 | ||
|
|
dd5b6f9f12 | ||
|
|
303bb22ee1 | ||
|
|
5d8b1a3a31 | ||
|
|
3d6c349d88 | ||
|
|
1ade1acb22 | ||
|
|
66fd003ab4 | ||
|
|
0d42b771f5 | ||
|
|
005640be51 | ||
|
|
b6ebd34e2f | ||
|
|
951879f80d | ||
|
|
46ff9fb587 | ||
|
|
cc610f995d | ||
|
|
384618cb33 | ||
|
|
0dd617cfca | ||
|
|
f18d629bd2 | ||
|
|
afc904f876 | ||
|
|
3bcea46c3b | ||
|
|
d7e85f7565 | ||
|
|
b14d8ddb74 | ||
|
|
9b2a907b09 | ||
|
|
f63d42fe77 | ||
|
|
560066fa9d | ||
|
|
3937670d14 | ||
|
|
0daa7381b3 | ||
|
|
e53545af4f | ||
|
|
45178c19d8 | ||
|
|
cf46834041 | ||
|
|
c30609426a | ||
|
|
1c49c4159c | ||
|
|
b6b6439819 | ||
|
|
16896510dc | ||
|
|
9a05999abb | ||
|
|
1c155a1088 | ||
|
|
4c463a66b7 | ||
|
|
31d57f4122 | ||
|
|
209de699ce | ||
|
|
7b313b9d71 | ||
|
|
cf126642bd | ||
|
|
52281fcde8 | ||
|
|
de573edaaa | ||
|
|
4cb7f301ad | ||
|
|
87d8de4441 | ||
|
|
6db742f81e | ||
|
|
c79933685c | ||
|
|
04ba672b9f | ||
|
|
4f4111063a | ||
|
|
3a3a536e6d | ||
|
|
6f7206a5a1 | ||
|
|
f9fd1dd227 | ||
|
|
8140ba9c27 | ||
|
|
32dba444e1 | ||
|
|
e814c1120e | ||
|
|
247823db4d | ||
|
|
beda22d5f9 | ||
|
|
2eb00a3e6f | ||
|
|
0a798bf6e4 | ||
|
|
21b2ff1a1f | ||
|
|
8212ff8d8a | ||
|
|
57f9432692 | ||
|
|
54d3c7a4ca | ||
|
|
7fd44a3d74 | ||
|
|
b0f6b7bad7 | ||
|
|
4dbbf40196 | ||
|
|
d5e24689a4 | ||
|
|
10e47441a2 | ||
|
|
1ccd0edad2 | ||
|
|
59b31dd1ca | ||
|
|
300b9f0cc2 | ||
|
|
274a30efa5 | ||
|
|
db63b5bb1c | ||
|
|
0efee4cf65 | ||
|
|
0cb2584886 | ||
|
|
b88d27248c | ||
|
|
683c54325e | ||
|
|
70d398cd47 | ||
|
|
7b7d80e5f2 | ||
|
|
e100728b93 | ||
|
|
d104f2a914 | ||
|
|
2946c097f0 | ||
|
|
a538ceb0ea | ||
|
|
5a2a8d1c82 | ||
|
|
b5a7efa58e | ||
|
|
96b0e26084 | ||
|
|
91c498f6f1 | ||
|
|
d48093e732 | ||
|
|
3f0d1754a4 | ||
|
|
f27979bbe1 | ||
|
|
9f6f58e4ed | ||
|
|
c22f4eaf6f | ||
|
|
925d82f7a4 | ||
|
|
1db577e294 | ||
|
|
a886fddccc | ||
|
|
83e5f98171 | ||
|
|
eb31a56186 | ||
|
|
8cd2c6fd05 | ||
|
|
e3e1c5de4e | ||
|
|
f9a150504a | ||
|
|
e9445a5d5e | ||
|
|
9a2717b5e3 | ||
|
|
5bc809466c | ||
|
|
dd6ea1cd77 | ||
|
|
5d32026b79 | ||
|
|
de5908c122 | ||
|
|
2a8d6f72c6 | ||
|
|
190cc7dcb4 | ||
|
|
4b5c84921c | ||
|
|
819937d4bd | ||
|
|
aaa8d70cef | ||
|
|
57299cb978 | ||
|
|
ca31b846e7 | ||
|
|
59f503835b | ||
|
|
33e626cd75 | ||
|
|
a27cecb559 | ||
|
|
491ec37adf | ||
|
|
cf0cdfa6a1 | ||
|
|
31489d92c0 | ||
|
|
c93790fc96 | ||
|
|
b7fd13aed2 | ||
|
|
ecabe2c294 | ||
|
|
2ba57e5938 | ||
|
|
3c4bf27aa7 | ||
|
|
0ebd9c15d9 | ||
|
|
f9dba283d4 | ||
|
|
205f1cebbb | ||
|
|
4d97c1ebf7 | ||
|
|
12c395e91f | ||
|
|
bd1e4f71d6 | ||
|
|
cb49071ea4 | ||
|
|
2eec17e25f | ||
|
|
c48c248c15 | ||
|
|
958e45f2b8 | ||
|
|
daafd70383 | ||
|
|
c828598bfb | ||
|
|
b55519c4a2 | ||
|
|
4cafd443e1 | ||
|
|
d400d7f9ac | ||
|
|
62bb3db1f8 | ||
|
|
5ad674edff | ||
|
|
ac09bad89c | ||
|
|
009d92fec8 | ||
|
|
b3d8a68a1d | ||
|
|
05b47cb2a8 | ||
|
|
dc542a1b7d | ||
|
|
6ce8058749 | ||
|
|
2edcac77f0 | ||
|
|
f740374392 | ||
|
|
d9961bbb17 | ||
|
|
e1b8982c14 | ||
|
|
2fe3b3c2a3 | ||
|
|
c6e1bc205a | ||
|
|
7241391ddc | ||
|
|
c8f449f178 | ||
|
|
49420c437f | ||
|
|
827ffef5f9 | ||
|
|
16296bb1c3 |
2
FAQ.md
2
FAQ.md
@@ -38,7 +38,7 @@ General
|
||||
|
||||
No. Hash indexes and replication do not mix well and their use is
|
||||
explicitly discouraged; see:
|
||||
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||
https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||
|
||||
`repmgr`
|
||||
--------
|
||||
|
||||
48
HISTORY
48
HISTORY
@@ -1,4 +1,50 @@
|
||||
3.1.0 2016-01-
|
||||
3.1.5 2016-08-15
|
||||
repmgrd: in a failover situation, prevent endless looping when
|
||||
attempting to establish the status of a node with
|
||||
`failover=manual` (Ian)
|
||||
repmgrd: improve handling of failover events on standbys with
|
||||
`failover=manual`, and create a new event notification
|
||||
for this, `standby_disconnect_manual` (Ian)
|
||||
repmgr: add further event notifications (Gianni)
|
||||
repmgr: when executing `standby switchover`, don't collect remote
|
||||
command output unless required (Gianni, Ian)
|
||||
repmgrd: improve standby monitoring query (Ian, based on suggestion
|
||||
from Álvaro)
|
||||
repmgr: various command line handling improvements (Ian)
|
||||
|
||||
3.1.4 2016-07-12
|
||||
repmgr: new configuration option for setting "restore_command"
|
||||
in the recovery.conf file generated by repmgr (Martín)
|
||||
repmgr: add --csv option to "repmgr cluster show" (Gianni)
|
||||
repmgr: enable provision of a conninfo string as the -d/--dbname
|
||||
parameter, similar to other PostgreSQL utilities (Ian)
|
||||
repmgr: during switchover operations improve detection of
|
||||
demotion candidate shutdown (Ian)
|
||||
various bugfixes and documentation updates (Ian, Martín)
|
||||
|
||||
3.1.3 2016-05-17
|
||||
repmgrd: enable monitoring when a standby is catching up by
|
||||
replaying archived WAL (Ian)
|
||||
repmgrd: when upstream_node_id is NULL, assume upstream node
|
||||
to be current master (Ian)
|
||||
repmgrd: check for reappearance of the master node if standby
|
||||
promotion fails (Ian)
|
||||
improve handling of rsync failure conditions (Martín)
|
||||
|
||||
3.1.2 2016-04-12
|
||||
Fix pg_ctl path generation in do_standby_switchover() (Ian)
|
||||
Regularly sync witness server repl_nodes table (Ian)
|
||||
Documentation improvements (Gianni, dhyannataraj)
|
||||
(Experimental) ensure repmgr handles failover slots when copying
|
||||
in rsync mode (Craig, Ian)
|
||||
rsync mode handling fixes (Martín)
|
||||
Enable repmgr to compile against 9.6devel (Ian)
|
||||
|
||||
3.1.1 2016-02-24
|
||||
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
|
||||
Prevent repmgr/repmgrd running as root (Ian)
|
||||
|
||||
3.1.0 2016-02-01
|
||||
Add "repmgr standby switchover" command (Ian)
|
||||
Revised README file (Ian)
|
||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||
|
||||
29
Makefile
29
Makefile
@@ -2,23 +2,32 @@
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o
|
||||
|
||||
DATA = repmgr.sql uninstall_repmgr.sql
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
all: repmgrd repmgr
|
||||
|
||||
all: repmgrd repmgr
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgrd: $(repmgrd_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
||||
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgr: $(repmgr_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
|
||||
# Make all objects depend on all include files. This is a bit of a
|
||||
# shotgun approach, but the codebase is small enough that a complete rebuild
|
||||
# is very fast anyway.
|
||||
$(repmgr_OBJS): $(HEADERS)
|
||||
$(repmgrd_OBJS): $(HEADERS)
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
@@ -31,8 +40,8 @@ include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
||||
# is overriding pgxs install.
|
||||
# XXX: This overrides the pgxs install target - we're building two binaries,
|
||||
# which is not supported by pgxs.mk's PROGRAM construct.
|
||||
install: install_prog install_ext
|
||||
|
||||
install_prog:
|
||||
@@ -43,6 +52,12 @@ install_prog:
|
||||
install_ext:
|
||||
$(MAKE) -C sql install
|
||||
|
||||
# Distribution-specific package building targets
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# XXX we recommend using the PGDG-supplied packages where possible;
|
||||
# see README.md for details.
|
||||
|
||||
install_rhel:
|
||||
mkdir -p '$(DESTDIR)/etc/init.d/'
|
||||
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
||||
|
||||
353
README.md
353
README.md
@@ -33,10 +33,14 @@ provides a single read/write master server and one or more read-only standbys
|
||||
containing near-real time copies of the master server's database.
|
||||
|
||||
For a multi-master replication solution, please see 2ndQuadrant's BDR
|
||||
(bi-directional replication) extension. For selective replication, e.g.
|
||||
of individual tables or databases from one server to another, please
|
||||
see 2ndQuadrant's pglogical extension.
|
||||
(bi-directional replication) extension.
|
||||
|
||||
http://2ndquadrant.com/en-us/resources/bdr/
|
||||
|
||||
For selective replication, e.g. of individual tables or databases from one server
|
||||
to another, please see 2ndQuadrant's pglogical extension.
|
||||
|
||||
http://2ndquadrant.com/en-us/resources/pglogical/
|
||||
|
||||
### Concepts
|
||||
|
||||
@@ -44,7 +48,7 @@ This guide assumes that you are familiar with PostgreSQL administration and
|
||||
streaming replication concepts. For further details on streaming
|
||||
replication, see this link:
|
||||
|
||||
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
|
||||
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
|
||||
|
||||
The following terms are used throughout the `repmgr` documentation.
|
||||
|
||||
@@ -109,16 +113,16 @@ tables:
|
||||
- `repl_monitor`: historical standby monitoring information written by `repmgrd`
|
||||
|
||||
views:
|
||||
- `repl_show_nodes`: based on the `repl_nodes` showing name of the server's
|
||||
upstream node
|
||||
- `repl_show_nodes`: based on the table `repl_nodes`, additionally showing the
|
||||
name of the server's upstream node
|
||||
- `repl_status`: when `repmgrd`'s monitoring is enabled, shows current monitoring
|
||||
status for each node
|
||||
|
||||
The `repmgr` metadata schema can be stored in an existing database or in its own
|
||||
dedicated database.
|
||||
|
||||
A dedicated superuser is required to own the meta-database as well as carry out
|
||||
administrative actions.
|
||||
A dedicated database superuser is required to own the meta-database as well as carry
|
||||
out administrative actions.
|
||||
|
||||
Installation
|
||||
------------
|
||||
@@ -128,7 +132,9 @@ Installation
|
||||
`repmgr` is developed and tested on Linux and OS X, but should work on any
|
||||
UNIX-like system supported by PostgreSQL itself.
|
||||
|
||||
`repmgr` supports PostgreSQL from version 9.3.
|
||||
Current versions of `repmgr` support PostgreSQL from version 9.3. If you are
|
||||
interested in using `repmgr` on earlier versions of PostgreSQL you can download
|
||||
version 2.1 which supports PostgreSQL from version 9.1.
|
||||
|
||||
All servers in the replication cluster must be running the same major version of
|
||||
PostgreSQL, and we recommend that they also run the same minor version.
|
||||
@@ -137,7 +143,7 @@ The `repmgr` tools must be installed on each server in the replication cluster.
|
||||
|
||||
A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
|
||||
`repmgrd` actions require direct access to the PostgreSQL data directory,
|
||||
it should executed by the `postgres` user.
|
||||
it should be executed by the `postgres` user.
|
||||
|
||||
Additionally, we recommend installing `rsync` and enabling passwordless
|
||||
`ssh` connectivity between all servers in the replication cluster.
|
||||
@@ -149,9 +155,15 @@ system.
|
||||
|
||||
- RedHat/CentOS: RPM packages for `repmgr` are available via Yum through
|
||||
the PostgreSQL Global Development Group RPM repository ( http://yum.postgresql.org/ ).
|
||||
You need to follow the instructions for your distribution (RedHat, CentOS,
|
||||
Follow the instructions for your distribution (RedHat, CentOS,
|
||||
Fedora, etc.) and architecture as detailed at yum.postgresql.org.
|
||||
|
||||
2ndQuadrant also provides its own RPM packages which are made available
|
||||
at the same time as each `repmgr` release, as it can take some days for
|
||||
them to become available via the main PGDG repository. See here for details:
|
||||
|
||||
http://repmgr.org/yum-repository.html
|
||||
|
||||
- Debian/Ubuntu: the most recent `repmgr` packages are available from the
|
||||
PostgreSQL Community APT repository ( http://apt.postgresql.org/ ).
|
||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||
@@ -186,7 +198,8 @@ PostgreSQL itself.
|
||||
`repmgr` and `repmgrd` use a common configuration file, by default called
|
||||
`repmgr.conf` (although any name can be used if explicitly specified).
|
||||
At the very least, `repmgr.conf` must contain the connection parameters
|
||||
for the local `repmgr` database.
|
||||
for the local `repmgr` database; see `repmgr configuration file` below
|
||||
for more details.
|
||||
|
||||
The configuration file will be searched for in the following locations:
|
||||
|
||||
@@ -208,6 +221,34 @@ command line options:
|
||||
- `-b/--pg_bindir`
|
||||
|
||||
|
||||
### Command line options and environment variables
|
||||
|
||||
For some commands, e.g. `repmgr standby clone`, database connection parameters
|
||||
need to be provided. Like other PostgreSQL utilities, following standard
|
||||
parameters can be used:
|
||||
|
||||
- `-d/--dbname=DBNAME`
|
||||
- `-h/--host=HOSTNAME`
|
||||
- `-p/--port=PORT`
|
||||
- `-U/--username=USERNAME`
|
||||
|
||||
If `-d/--dbname` contains an `=` sign or starts with a valid URI prefix (`postgresql://`
|
||||
or `postgres://`), it is treated as a conninfo string. See the PostgreSQL
|
||||
documentation for further details:
|
||||
|
||||
https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
|
||||
Note that if a `conninfo` string is provided, values set in this will override any
|
||||
provided as individual parameters. For example, with `-d 'host=foo' --host bar`, `foo`
|
||||
will be chosen over `bar`.
|
||||
|
||||
Like other PostgreSQL utilities, `repmgr` will default to any values set in environment
|
||||
variables if explicit command line parameters are not provided. See the PostgreSQL
|
||||
documentation for further details:
|
||||
|
||||
https://www.postgresql.org/docs/current/static/libpq-envars.html
|
||||
|
||||
|
||||
Setting up a simple replication cluster with repmgr
|
||||
---------------------------------------------------
|
||||
|
||||
@@ -230,15 +271,19 @@ both servers.
|
||||
On the master server, a PostgreSQL instance must be initialised and running.
|
||||
The following replication settings must be included in `postgresql.conf`:
|
||||
|
||||
|
||||
# Enable replication connections; set this figure to at least one more
|
||||
# than the number of standbys which will connect to this server
|
||||
# (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
|
||||
# which requires two free WAL senders)
|
||||
|
||||
max_wal_senders = 10
|
||||
|
||||
# Ensure WAL files contain enough information to enable read-only queries
|
||||
# on the standby
|
||||
|
||||
wal_level = 'hot_standby'
|
||||
|
||||
# Enable up to 10 replication connections
|
||||
|
||||
max_wal_senders = 10
|
||||
|
||||
# How much WAL to retain on the master to allow a temporarily
|
||||
# disconnected standby to catch up again. The larger this is, the
|
||||
# longer the standby can be disconnected. This is needed only in
|
||||
@@ -252,6 +297,14 @@ The following replication settings must be included in `postgresql.conf`:
|
||||
|
||||
hot_standby = on
|
||||
|
||||
# Enable WAL file archiving
|
||||
archive_mode = on
|
||||
|
||||
# Set archive command to a script or application that will safely store
|
||||
# you WALs in a secure place. /bin/true is an example of a command that
|
||||
# ignores archiving. Use something more sensible.
|
||||
archive_command = '/bin/true'
|
||||
|
||||
|
||||
* * *
|
||||
|
||||
@@ -277,11 +330,11 @@ similar to the following:
|
||||
|
||||
local replication repmgr trust
|
||||
host replication repmgr 127.0.0.1/32 trust
|
||||
host replication repmgr 192.168.1.0/32 trust
|
||||
host replication repmgr 192.168.1.0/24 trust
|
||||
|
||||
local repmgr repmgr trust
|
||||
host repmgr repmgr 127.0.0.1/32 trust
|
||||
host repmgr repmgr 192.168.1.0/32 trust
|
||||
host repmgr repmgr 192.168.1.0/24 trust
|
||||
|
||||
Adjust according to your network environment and authentication requirements.
|
||||
|
||||
@@ -364,25 +417,43 @@ Clone the standby with:
|
||||
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
||||
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
||||
|
||||
This will clone the PostgreSQL data directory files from the master using
|
||||
PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
||||
correct parameters to start streaming from the master server will be created
|
||||
automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf`
|
||||
files will be copied.
|
||||
This will clone the PostgreSQL data directory files from the master at `repmgr_node1`
|
||||
using PostgreSQL's `pg_basebackup` utility. A `recovery.conf` file containing the
|
||||
correct parameters to start streaming from this master server will be created
|
||||
automatically, and unless otherwise specified, the `postgresql.conf` and `pg_hba.conf`
|
||||
files will be copied from the master.
|
||||
|
||||
Make any adjustments to the PostgreSQL configuration files now, then start the
|
||||
standby server.
|
||||
Be aware that when initially cloning a standby, you will need to ensure
|
||||
that all required WAL files remain available while the cloning is taking
|
||||
place. To ensure this happens when using the default `pg_basebackup` method,
|
||||
`repmgr` will set `pg_basebackup`'s `--xlog-method` parameter to `stream`,
|
||||
which will ensure all WAL files generated during the cloning process are
|
||||
streamed in parallel with the main backup. Note that this requires two
|
||||
replication connections to be available.
|
||||
|
||||
To override this behaviour, in `repmgr.conf` set `pg_basebackup`'s
|
||||
`--xlog-method` parameter to `fetch`:
|
||||
|
||||
pg_basebackup_options='--xlog-method=fetch'
|
||||
|
||||
and ensure that `wal_keep_segments` is set to an appropriately high value.
|
||||
See the `pg_basebackup` documentation for details:
|
||||
|
||||
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
|
||||
Make any adjustments to the standby's PostgreSQL configuration files now,
|
||||
then start the server.
|
||||
|
||||
* * *
|
||||
|
||||
> *NOTE*: `repmgr standby clone` does not require `repmgr.conf`, however we
|
||||
> recommend providing this as `repmgr` will set the `application_name` parameter
|
||||
> in `recovery.conf` as value provided in `node_name`, making it easier to identify
|
||||
> the node in `pg_stat_replication`. It's also possible to provide some advanced
|
||||
> options for controlling the standby cloning process; see next section for
|
||||
> details.
|
||||
> in `recovery.conf` as the value provided in `node_name`, making it easier to
|
||||
> identify the node in `pg_stat_replication`. It's also possible to provide some
|
||||
> advanced options for controlling the standby cloning process; see next section
|
||||
> for details.
|
||||
|
||||
***
|
||||
* * *
|
||||
|
||||
### Verify replication is functioning
|
||||
|
||||
@@ -425,20 +496,20 @@ table:
|
||||
2 | standby | 1 | test | node2 | host=repmgr_node2 dbname=repmgr user=repmgr | | 100 | t
|
||||
(2 rows)
|
||||
|
||||
The standby server now has a copy of records for all servers in the replication
|
||||
cluster. Note that the relationship between master and standby is explicitly
|
||||
defined via the `upstream_node_id` value, which shows here that the standby's
|
||||
upstream server is the replication cluster master. While of limited use
|
||||
in a simple master/standby replication cluster, this information is required
|
||||
The standby server now has a copy of the records for all servers in the
|
||||
replication cluster. Note that the relationship between master and standby is
|
||||
explicitly defined via the `upstream_node_id` value, which shows here that the
|
||||
standby's upstream server is the replication cluster master. While of limited
|
||||
use in a simple master/standby replication cluster, this information is required
|
||||
to effectively manage cascading replication (see below).
|
||||
|
||||
|
||||
Advanced options for cloning a standby
|
||||
--------------------------------------
|
||||
|
||||
The above section demonstrates the simplest possible way to clone
|
||||
a standby server. Depending on your situation, finer-grained control
|
||||
over the cloning process may be necessary.
|
||||
The above section demonstrates the simplest possible way to cloneb a standby
|
||||
server. Depending on your circumstances, finer-grained controlover the cloning
|
||||
process may be necessary.
|
||||
|
||||
### pg_basebackup options when cloning a standby
|
||||
|
||||
@@ -449,9 +520,13 @@ However this may impact performance of the server being cloned from
|
||||
so should be used with care.
|
||||
|
||||
Further options can be passed to the `pg_basebackup` utility via
|
||||
the `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||
documentation for more details:
|
||||
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||
documentation for more details of available options:
|
||||
<<<<<<< HEAD
|
||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
=======
|
||||
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
>>>>>>> 72f9b0145afab1060dd1202c8f8937653c8b2e39
|
||||
|
||||
### Using rsync to clone a standby
|
||||
|
||||
@@ -469,12 +544,11 @@ and destination server as the contents of files existing on both servers need
|
||||
to be compared, meaning this method is not necessarily faster than making a
|
||||
fresh clone with `pg_basebackup`.
|
||||
|
||||
|
||||
### Dealing with PostgreSQL configuration files
|
||||
|
||||
By default, `repmgr` will attempt to copy the standard configuration files
|
||||
(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
|
||||
outside of the data directory (though note currently they will be copied
|
||||
outside of the data directory (though currently they will be copied
|
||||
into the standby's data directory). To prevent this happening, when executing
|
||||
`repmgr standby clone` provide the `--ignore-external-config-files` option.
|
||||
|
||||
@@ -484,6 +558,21 @@ which enables any valid `rsync` options to be passed to that command, e.g.:
|
||||
|
||||
rsync_options='--exclude=postgresql.local.conf'
|
||||
|
||||
### Controlling `primary_conninfo` in `recovery.conf`
|
||||
|
||||
`repmgr` will create the `primary_conninfo` setting in `recovery.conf` based
|
||||
on the connection parameters provided to `repmgr standby clone` and PostgreSQL's
|
||||
standard connection defaults, including any environment variables set on the
|
||||
local node.
|
||||
|
||||
To include specific connection parameters other than the standard host, port,
|
||||
username and database values (e.g. `sslmode`), include these in a `conninfo`-style
|
||||
tring passed to `repmgr` with `-d/--dbname` (see above for details), and/or set
|
||||
appropriate environment variables.
|
||||
|
||||
Note that PostgreSQL will always set explicit defaults for `sslmode` and
|
||||
`sslcompression`.
|
||||
|
||||
|
||||
Setting up cascading replication with repmgr
|
||||
--------------------------------------------
|
||||
@@ -557,6 +646,10 @@ To enable `repmgr` to use replication slots, set the boolean parameter
|
||||
Note that `repmgr` will fail with an error if this option is specified when
|
||||
working with PostgreSQL 9.3.
|
||||
|
||||
Replication slots must be enabled in `postgresql.conf` by setting the parameter
|
||||
`max_replication_slots` to at least the number of expected standbys (changes
|
||||
to this parameter require a server restart).
|
||||
|
||||
When cloning a standby, `repmgr` will automatically generate an appropriate
|
||||
slot name, which is stored in the `repl_nodes` table, and create the slot
|
||||
on the master:
|
||||
@@ -579,21 +672,9 @@ Note that a slot name will be created by default for the master but not
|
||||
actually used unless the master is converted to a standby using e.g.
|
||||
`repmgr standby switchover`.
|
||||
|
||||
Be aware that when initially cloning a standby, you will need to ensure
|
||||
that all required WAL files remain available while the cloning is taking
|
||||
place. If using the default `pg_basebackup` method, we recommend setting
|
||||
`pg_basebackup`'s `--xlog-method` parameter to `stream` like this:
|
||||
|
||||
pg_basebackup_options='--xlog-method=stream'
|
||||
|
||||
See the `pg_basebackup` documentation for details:
|
||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
|
||||
Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
|
||||
value.
|
||||
|
||||
Further information on replication slots in the PostgreSQL documentation:
|
||||
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||
|
||||
|
||||
Promoting a standby server with repmgr
|
||||
@@ -692,8 +773,9 @@ updated to reflect this:
|
||||
|
||||
|
||||
Note that with cascading replication, `repmgr standby follow` can also be
|
||||
used to detach a standby from its current upstream server and follow another
|
||||
upstream server, including the master.
|
||||
used to detach a standby from its current upstream server and follow the
|
||||
master. However it's currently not possible to have it follow another standby;
|
||||
we hope to improve this in a future release.
|
||||
|
||||
|
||||
Performing a switchover with repmgr
|
||||
@@ -702,16 +784,16 @@ Performing a switchover with repmgr
|
||||
A typical use-case for replication is a combination of master and standby
|
||||
server, with the standby serving as a backup which can easily be activated
|
||||
in case of a problem with the master. Such an unplanned failover would
|
||||
normally be handled by promoting the standby, after which appropriate action
|
||||
taken to restore the old master.
|
||||
normally be handled by promoting the standby, after which an appropriate
|
||||
action must be taken to restore the old master.
|
||||
|
||||
In some cases however it's desirable to promote the standby in a planned
|
||||
way, e.g. so maintenance can be performed on the master; this kind of switchover
|
||||
is supported by the `repmgr standby switchover` command.
|
||||
|
||||
`repmgr standby switchover` differs from other `repmgr` actions in that it
|
||||
also performs actions on another server, for which reason both passwordless
|
||||
SSH access and the path of `repmgr.conf` on that server.
|
||||
also performs actions on another server, for which reason you must provide
|
||||
both passwordless SSH access and the path of `repmgr.conf` on that server.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -720,7 +802,7 @@ SSH access and the path of `repmgr.conf` on that server.
|
||||
> careful preparation and with adequate attention. In particular you should
|
||||
> be confident that your network environment is stable and reliable.
|
||||
>
|
||||
> We recommend running `repmgr standby switchover` at the most verbose
|
||||
> We recommend running `repmgr standby switchover` at the most verbose
|
||||
> logging level (`--log-level DEBUG --verbose`) and capturing all output
|
||||
> to assist troubleshooting any problems.
|
||||
>
|
||||
@@ -786,7 +868,7 @@ should have been updated to reflect this:
|
||||
|
||||
### Caveats
|
||||
|
||||
- the functionality provided `repmgr standby switchover` is primarily aimed
|
||||
- The functionality provided `repmgr standby switchover` is primarily aimed
|
||||
at a two-server master/standby replication cluster and currently does
|
||||
not support additional standbys.
|
||||
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
||||
@@ -795,11 +877,16 @@ should have been updated to reflect this:
|
||||
- `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
`pg_rewind` documentation for details:
|
||||
http://www.postgresql.org/docs/current/static/app-pgrewind.html
|
||||
https://www.postgresql.org/docs/current/static/app-pgrewind.html
|
||||
- `repmgrd` should not be running when a switchover is carried out, otherwise
|
||||
the `repmgrd` may try and promote a standby by itself.
|
||||
- Any other standbys attached to the old master will need to be manually
|
||||
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
||||
- You must ensure that following a server start using `pg_ctl`, log output
|
||||
is not send to STDERR (the default behaviour). If logging is not configured,
|
||||
We recommend setting `logging_collector=on` in `postgresql.conf` and
|
||||
providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
|
||||
parameter.
|
||||
|
||||
We hope to remove some of these restrictions in future versions of `repmgr`.
|
||||
|
||||
@@ -853,8 +940,8 @@ Adjust schema and node ID accordingly. A future `repmgr` release
|
||||
will make it possible to unregister failed standbys.
|
||||
|
||||
|
||||
Automatic failover with repmgrd
|
||||
-------------------------------
|
||||
Automatic failover with `repmgrd`
|
||||
---------------------------------
|
||||
|
||||
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
||||
and which can automate actions such as failover and updating standbys to
|
||||
@@ -869,6 +956,10 @@ be set in `repmgr.conf`:
|
||||
|
||||
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
|
||||
|
||||
Additionally, `postgresql.conf` must contain the following line:
|
||||
|
||||
shared_preload_libraries = 'repmgr_funcs'
|
||||
|
||||
When `failover` is set to `automatic`, upon detecting failure of the current
|
||||
master, `repmgrd` will execute one of `promote_command` or `follow_command`,
|
||||
depending on whether the current server is becoming the new master or
|
||||
@@ -878,7 +969,7 @@ actions happening, but we strongly recommend executing `repmgr` directly.
|
||||
|
||||
`repmgrd` can be started simply with e.g.:
|
||||
|
||||
repmgrd -f /etc/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
||||
repmgrd -f /etc/repmgr.conf --verbose >> $HOME/repmgr/repmgr.log 2>&1
|
||||
|
||||
For permanent operation, we recommend using the options `-d/--daemonize` to
|
||||
detach the `repmgrd` process, and `-p/--pid-file` to write the process PID
|
||||
@@ -900,7 +991,7 @@ table looks like this:
|
||||
|
||||
|
||||
Start `repmgrd` on each standby and verify that it's running by examining
|
||||
the log output, which at default log level will look like this:
|
||||
the log output, which at log level INFO will look like this:
|
||||
|
||||
[2016-01-05 13:15:40] [INFO] checking cluster configuration with schema 'repmgr_test'
|
||||
[2016-01-05 13:15:40] [INFO] checking node 2 in cluster 'test'
|
||||
@@ -970,8 +1061,8 @@ during the failover:
|
||||
(3 rows)
|
||||
|
||||
|
||||
repmgrd log rotation
|
||||
--------------------
|
||||
`repmgrd` log rotation
|
||||
----------------------
|
||||
|
||||
Note that currently `repmgrd` does not provide logfile rotation. To ensure
|
||||
the current logfile does not grow indefinitely, configure your system's `logrotate`
|
||||
@@ -987,12 +1078,36 @@ for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
|
||||
create 0600 postgres postgres
|
||||
}
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
`repmgrd` and PostgreSQL connection settings
|
||||
--------------------------------------------
|
||||
|
||||
In addition to the `repmgr` configuration settings, parameters in the
|
||||
`conninfo` string influence how `repmgr` makes a network connection to
|
||||
PostgreSQL. In particular, if another server in the replication cluster
|
||||
is unreachable at network level, system network settings will influence
|
||||
the length of time it takes to determine that the connection is not possible.
|
||||
|
||||
In particular explicitly setting a parameter for `connect_timeout` should
|
||||
be considered; the effective minimum value of `2` (seconds) will ensure
|
||||
that a connection failure at network level is reported as soon as possible,
|
||||
otherwise depending on the system settings (e.g. `tcp_syn_retries` in Linux)
|
||||
a delay of a minute or more is possible.
|
||||
|
||||
For further details on `conninfo` network connection parameters, see:
|
||||
|
||||
https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
|
||||
|
||||
|
||||
Monitoring with `repmgrd`
|
||||
-------------------------
|
||||
|
||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
||||
constantly write node status information to the `repl_monitor` table, which can
|
||||
be queried easily using the view `repl_status`:
|
||||
constantly write standby node status information to the `repl_monitor` table,
|
||||
providing a near-real time overview of replication status on all nodes
|
||||
in the cluster.
|
||||
|
||||
The view `repl_status` shows the most recent state for each node, e.g.:
|
||||
|
||||
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
||||
-[ RECORD 1 ]-------------+-----------------------------
|
||||
@@ -1017,6 +1132,17 @@ table , it's advisable to regularly purge historical data with
|
||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||
many day's worth of data should be retained.
|
||||
|
||||
It's possible to use `repmgrd` to provide monitoring only for some or all
|
||||
nodes by setting `failover = manual` in the node's `repmgr.conf`. In the
|
||||
event of the node's upstream failing, no failover action will be taken
|
||||
and the node will require manual intervention to be reattached to replication.
|
||||
If this occurs, event notification `standby_disconnect_manual` will be
|
||||
created.
|
||||
|
||||
Note that when a standby node is not streaming directly from its upstream
|
||||
node, e.g. recovering WAL from an archive, `apply_lag` will always appear as
|
||||
`0 bytes`.
|
||||
|
||||
|
||||
Using a witness server with repmgrd
|
||||
------------------------------------
|
||||
@@ -1042,7 +1168,6 @@ makes sense to create a witness server in conjunction with running
|
||||
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
||||
|
||||
|
||||
|
||||
repmgrd and cascading replication
|
||||
---------------------------------
|
||||
|
||||
@@ -1113,6 +1238,7 @@ The following event types are available:
|
||||
* `standby_promote`
|
||||
* `standby_follow`
|
||||
* `standby_switchover`
|
||||
* `standby_disconnect_manual`
|
||||
* `witness_create`
|
||||
* `witness_create`
|
||||
* `repmgrd_start`
|
||||
@@ -1159,7 +1285,7 @@ configuration file is located if `-f/--config-file` is not supplied.
|
||||
### repmgr commands
|
||||
|
||||
The `repmgr` command line tool accepts commands for specific servers in the
|
||||
replication in the format "`server type` `action`", or for the entire
|
||||
replication in the format "`server_type` `action`", or for the entire
|
||||
replication cluster in the format "`cluster` `action`". Each command is
|
||||
described below.
|
||||
|
||||
@@ -1247,31 +1373,59 @@ which contains connection details for the local database.
|
||||
time a failover occurs.
|
||||
|
||||
Note that it only makes sense to create a witness server if `repmgrd`
|
||||
is in use; see section "witness server" above.
|
||||
is in use; see section "Using a witness server" above.
|
||||
|
||||
This command requires a `repmgr.conf` file containing a valid conninfo
|
||||
string for the server to be created, as well as the other minimum required
|
||||
parameters detailed in the section `repmgr configuration file` above.
|
||||
|
||||
By default the witness server will use port 5499 to facilitate easier setup
|
||||
on a server running an existing node.
|
||||
on a server running an existing node. To use a different port, supply
|
||||
this explicitly in the `repmgr.conf` conninfo string.
|
||||
|
||||
This command also requires the location of the witness server's data
|
||||
directory to be provided (`-D/--datadir`) as well as valid connection
|
||||
parameters for the master server.
|
||||
|
||||
By default this command will create a superuser and a repmgr user.
|
||||
The `repmgr` user name will be extracted from the `conninfo` string
|
||||
in `repmgr.conf`.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each active node in the replication cluster. This
|
||||
command polls each registered server and shows its role (master / standby /
|
||||
witness) or "FAILED" if the node doesn't respond. It polls each server
|
||||
witness) or `FAILED` if the node doesn't respond. It polls each server
|
||||
directly and can be run on any node in the cluster; this is also useful
|
||||
when analyzing connectivity from a particular node.
|
||||
|
||||
This command requires a valid `repmgr.conf` file to be provided; no
|
||||
additional arguments are required.
|
||||
additional arguments are needed.
|
||||
|
||||
Example:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
Role | Name | Upstream | Connection String
|
||||
----------+-------|----------|--------------------------------------------
|
||||
* master | node1 | | host=repmgr_node1 dbname=repmgr user=repmgr
|
||||
standby | node2 | node1 | host=repmgr_node1 dbname=repmgr user=repmgr
|
||||
standby | node3 | node2 | host=repmgr_node1 dbname=repmgr user=repmgr
|
||||
----------+-------|----------|----------------------------------------
|
||||
* master | node1 | | host=db_node1 dbname=repmgr user=repmgr
|
||||
standby | node2 | node1 | host=db_node2 dbname=repmgr user=repmgr
|
||||
standby | node3 | node2 | host=db_node3 dbname=repmgr user=repmgr
|
||||
|
||||
To show database connection errors when polling nodes, run the command in
|
||||
`--verbose` mode.
|
||||
|
||||
The `cluster show` command now accepts the optional parameter `--csv`, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||
1,-1
|
||||
2,0
|
||||
3,1
|
||||
|
||||
The first column is the node's ID, and the second column represents the
|
||||
node's status (0 = master, 1 = standby, -1 = failed).
|
||||
|
||||
* `cluster cleanup`
|
||||
|
||||
@@ -1290,20 +1444,22 @@ which contains connection details for the local database.
|
||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
||||
exit:
|
||||
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error (repmgr only)
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only)
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only)
|
||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
|
||||
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
|
||||
|
||||
|
||||
Support and Assistance
|
||||
----------------------
|
||||
@@ -1349,5 +1505,6 @@ Thanks from the repmgr core team.
|
||||
Further reading
|
||||
---------------
|
||||
|
||||
* http://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
||||
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
Summary: repmgr
|
||||
Name: repmgr
|
||||
Version: 3.0
|
||||
Release: 1
|
||||
License: GPLv3
|
||||
Group: System Environment/Daemons
|
||||
URL: http://repmgr.org
|
||||
Packager: Ian Barwick <ian@2ndquadrant.com>
|
||||
Vendor: 2ndQuadrant Limited
|
||||
Distribution: centos
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||
|
||||
%description
|
||||
repmgr is a utility suite which greatly simplifies
|
||||
the process of setting up and managing replication
|
||||
using streaming replication within a cluster of
|
||||
PostgreSQL servers.
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
%{__make} USE_PGXS=1
|
||||
|
||||
%install
|
||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
||||
|
||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
|
||||
|
||||
%clean
|
||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
||||
|
||||
|
||||
%files
|
||||
%defattr(-,root,root)
|
||||
/usr/bin/repmgr
|
||||
/usr/bin/repmgrd
|
||||
/usr/pgsql-9.3/bin/repmgr
|
||||
/usr/pgsql-9.3/bin/repmgrd
|
||||
/usr/pgsql-9.3/lib/repmgr_funcs.so
|
||||
/usr/pgsql-9.3/share/contrib/repmgr.sql
|
||||
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
|
||||
%attr(0755,root,root)/etc/init.d/repmgrd
|
||||
%attr(0644,root,root)/etc/sysconfig/repmgrd
|
||||
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
||||
|
||||
%changelog
|
||||
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
|
||||
- build for repmgr 3.0
|
||||
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
||||
- fix witness creation to create db and user if needed
|
||||
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
||||
- initial build for RHEL6
|
||||
@@ -1,133 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# chkconfig: - 75 16
|
||||
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# processname: repmgrd
|
||||
# pidfile="/var/run/${NAME}.pid"
|
||||
|
||||
# Source function library.
|
||||
INITD=/etc/rc.d/init.d
|
||||
. $INITD/functions
|
||||
|
||||
# Get function listing for cross-distribution logic.
|
||||
TYPESET=`typeset -f|grep "declare"`
|
||||
|
||||
# Get network config.
|
||||
. /etc/sysconfig/network
|
||||
|
||||
DESC="PostgreSQL replication management and monitoring daemon"
|
||||
NAME=repmgrd
|
||||
|
||||
REPMGRD_ENABLED=no
|
||||
REPMGRD_OPTS=
|
||||
REPMGRD_USER=postgres
|
||||
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
||||
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
||||
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
||||
|
||||
# Read configuration variable file if it is present
|
||||
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
||||
|
||||
# For SELinux we need to use 'runuser' not 'su'
|
||||
if [ -x /sbin/runuser ]
|
||||
then
|
||||
SU=runuser
|
||||
else
|
||||
SU=su
|
||||
fi
|
||||
|
||||
test -x $REPMGRD_BIN || exit 0
|
||||
|
||||
case "$REPMGRD_ENABLED" in
|
||||
[Yy]*)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
if [ -z "${REPMGRD_OPTS}" ]
|
||||
then
|
||||
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
start()
|
||||
{
|
||||
REPMGRD_START=$"Starting ${NAME} service: "
|
||||
|
||||
# Make sure startup-time log file is valid
|
||||
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
||||
then
|
||||
touch "${REPMGRD_LOG}" || exit 1
|
||||
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
||||
chmod go-rwx "${REPMGRD_LOG}"
|
||||
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
||||
fi
|
||||
|
||||
echo -n "${REPMGRD_START}"
|
||||
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
||||
sleep 2
|
||||
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
||||
if [ "x${pid}" != "x" ]
|
||||
then
|
||||
success "${REPMGRD_START}"
|
||||
touch "${REPMGRD_LOCK}"
|
||||
echo $pid > "${REPMGRD_PIDFILE}"
|
||||
echo
|
||||
else
|
||||
failure "${REPMGRD_START}"
|
||||
echo
|
||||
script_result=1
|
||||
fi
|
||||
}
|
||||
|
||||
stop()
|
||||
{
|
||||
echo -n $"Stopping ${NAME} service: "
|
||||
if [ -e "${REPMGRD_LOCK}" ]
|
||||
then
|
||||
killproc ${NAME}
|
||||
ret=$?
|
||||
if [ $ret -eq 0 ]
|
||||
then
|
||||
echo_success
|
||||
rm -f "${REPMGRD_PIDFILE}"
|
||||
rm -f "${REPMGRD_LOCK}"
|
||||
else
|
||||
echo_failure
|
||||
script_result=1
|
||||
fi
|
||||
else
|
||||
# not running; per LSB standards this is "ok"
|
||||
echo_success
|
||||
fi
|
||||
echo
|
||||
}
|
||||
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
start
|
||||
;;
|
||||
stop)
|
||||
stop
|
||||
;;
|
||||
status)
|
||||
status -p $REPMGRD_PIDFILE $NAME
|
||||
script_result=$?
|
||||
;;
|
||||
restart)
|
||||
stop
|
||||
start
|
||||
;;
|
||||
*)
|
||||
echo $"Usage: $0 {start|stop|status|restart}"
|
||||
exit 2
|
||||
esac
|
||||
|
||||
exit $script_result
|
||||
@@ -1,21 +0,0 @@
|
||||
# default settings for repmgrd. This file is source by /bin/sh from
|
||||
# /etc/init.d/repmgrd
|
||||
|
||||
# disable repmgrd by default so it won't get started upon installation
|
||||
# valid values: yes/no
|
||||
REPMGRD_ENABLED=no
|
||||
|
||||
# Options for repmgrd (required)
|
||||
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
||||
|
||||
# User to run repmgrd as
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
||||
|
||||
# pid file
|
||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||
|
||||
# log file
|
||||
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
||||
5
TODO
5
TODO
@@ -53,8 +53,9 @@ Planned feature improvements
|
||||
requested, activate the replication slot using pg_receivexlog to negate the
|
||||
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
||||
|
||||
* Take into account the fact that a standby can obtain WAL from an archive,
|
||||
so even if direct streaming replication is interrupted, it may be up-to-date
|
||||
* repmgr: enable "standby follow" to point a standby at another standby, not
|
||||
just the replication cluster master (see GitHub #130)
|
||||
|
||||
|
||||
Usability improvements
|
||||
======================
|
||||
|
||||
15
config.c
15
config.c
@@ -28,7 +28,7 @@ static void parse_event_notifications_list(t_configuration_options *options, con
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
static void exit_with_errors(ErrorList *config_errors);
|
||||
|
||||
const static char *_progname = '\0';
|
||||
const static char *_progname = NULL;
|
||||
static char config_file_path[MAXPGPATH];
|
||||
static bool config_file_provided = false;
|
||||
bool config_file_found = false;
|
||||
@@ -224,6 +224,7 @@ parse_config(t_configuration_options *options)
|
||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
|
||||
memset(options->restore_command, 0, sizeof(options->restore_command));
|
||||
|
||||
/* default master_response_timeout is 60 seconds */
|
||||
options->master_response_timeout = 60;
|
||||
@@ -235,7 +236,12 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
options->event_notifications.head = NULL;
|
||||
options->event_notifications.tail = NULL;
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
options->tablespace_mapping.tail = NULL;
|
||||
@@ -337,7 +343,8 @@ parse_config(t_configuration_options *options)
|
||||
strncpy(options->follow_command, value, MAXLEN);
|
||||
else if (strcmp(name, "master_response_timeout") == 0)
|
||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||
/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||
/*
|
||||
* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||
* we'll switch terminology in a future release (3.1?)
|
||||
*/
|
||||
else if (strcmp(name, "primary_response_timeout") == 0)
|
||||
@@ -358,6 +365,8 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
/* XXX we should have a dedicated boolean argument format */
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||
@@ -367,6 +376,8 @@ parse_config(t_configuration_options *options)
|
||||
parse_event_notifications_list(options, value);
|
||||
else if (strcmp(name, "tablespace_mapping") == 0)
|
||||
tablespace_list_append(options, value);
|
||||
else if (strcmp(name, "restore_command") == 0)
|
||||
strncpy(options->restore_command, value, MAXLEN);
|
||||
else
|
||||
{
|
||||
known_parameter = false;
|
||||
|
||||
8
config.h
8
config.h
@@ -72,16 +72,22 @@ typedef struct
|
||||
char pg_bindir[MAXLEN];
|
||||
char pg_ctl_options[MAXLEN];
|
||||
char pg_basebackup_options[MAXLEN];
|
||||
char restore_command[MAXLEN];
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int witness_repl_nodes_sync_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
/*
|
||||
* The following will initialize the structure with a minimal set of options;
|
||||
* actual defaults are set in parse_config() before parsing the configuration file
|
||||
*/
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
|
||||
221
dbutils.c
221
dbutils.c
@@ -31,9 +31,10 @@
|
||||
char repmgr_schema[MAXLEN] = "";
|
||||
char repmgr_schema_quoted[MAXLEN] = "";
|
||||
|
||||
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
|
||||
|
||||
PGconn *
|
||||
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
|
||||
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice, const bool verbose_only)
|
||||
{
|
||||
/* Make a connection to the database */
|
||||
PGconn *conn = NULL;
|
||||
@@ -49,15 +50,23 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
||||
/* Check to see that the backend connection was successfully made */
|
||||
if ((PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
if (log_notice)
|
||||
bool emit_log = true;
|
||||
|
||||
if (verbose_only == true && verbose_logging == false)
|
||||
emit_log = false;
|
||||
|
||||
if (emit_log)
|
||||
{
|
||||
log_notice(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_err(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
if (log_notice)
|
||||
{
|
||||
log_notice(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_err(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
}
|
||||
|
||||
if (exit_on_error)
|
||||
@@ -70,16 +79,35 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
||||
return conn;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Establish a database connection, optionally exit on error
|
||||
*/
|
||||
PGconn *
|
||||
establish_db_connection(const char *conninfo, const bool exit_on_error)
|
||||
{
|
||||
return _establish_db_connection(conninfo, exit_on_error, false);
|
||||
return _establish_db_connection(conninfo, exit_on_error, false, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to establish a database connection, never exit on error, only
|
||||
* output error messages if --verbose option used
|
||||
*/
|
||||
PGconn *
|
||||
test_db_connection(const char *conninfo, const bool exit_on_error)
|
||||
establish_db_connection_quiet(const char *conninfo)
|
||||
{
|
||||
return _establish_db_connection(conninfo, exit_on_error, true);
|
||||
return _establish_db_connection(conninfo, false, false, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to establish a database connection, never exit on error,
|
||||
* output connection error messages as NOTICE (useful when connection
|
||||
* failure is expected)
|
||||
*/
|
||||
PGconn *
|
||||
test_db_connection(const char *conninfo)
|
||||
{
|
||||
return _establish_db_connection(conninfo, false, true, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -420,7 +448,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||
parameter, datatype, op, value, datatype);
|
||||
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -538,7 +566,7 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)
|
||||
|
||||
conninfo_options = PQconninfoParse(conninfo, NULL);
|
||||
|
||||
if (conninfo_options == false)
|
||||
if (conninfo_options == NULL)
|
||||
{
|
||||
log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
|
||||
return false;
|
||||
@@ -587,7 +615,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
upstream_conninfo = upstream_conninfo_out;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.name, un.id "
|
||||
" SELECT un.conninfo, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
"INNER JOIN %s.repl_nodes n "
|
||||
" ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
|
||||
@@ -604,7 +632,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("unable to get conninfo for upstream server\n%s\n"),
|
||||
log_err(_("error when attempting to find upstream server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
@@ -612,9 +640,36 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
log_notice(_("no record found for upstream server"));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
log_debug("no record found for upstream server\n");
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
" WHERE un.cluster = '%s' "
|
||||
" AND un.type='master' "
|
||||
" AND un.active IS TRUE",
|
||||
get_repmgr_schema_quoted(standby_conn),
|
||||
cluster);
|
||||
res = PQexec(standby_conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("error when attempting to find active master server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
PQclear(res);
|
||||
log_notice(_("no record found for active master server\n"));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
log_debug("record found for active master server\n");
|
||||
}
|
||||
|
||||
strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
|
||||
@@ -889,7 +944,7 @@ get_repmgr_schema_quoted(PGconn *conn)
|
||||
|
||||
|
||||
bool
|
||||
create_replication_slot(PGconn *conn, char *slot_name)
|
||||
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int query_res;
|
||||
@@ -926,9 +981,19 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
||||
return false;
|
||||
}
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
/* In 9.6 and later, reserve the LSN straight away */
|
||||
if (server_version_num >= 90600)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
|
||||
slot_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
}
|
||||
|
||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||
@@ -1111,7 +1176,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
|
||||
|
||||
/*
|
||||
* copy_configuration()
|
||||
* witness_copy_node_records()
|
||||
*
|
||||
* Copy records in master's `repl_nodes` table to witness database
|
||||
*
|
||||
@@ -1119,29 +1184,49 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
* `repmgrd` after a failover event occurs
|
||||
*/
|
||||
bool
|
||||
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
{
|
||||
char sqlquery[MAXLEN];
|
||||
PGresult *res;
|
||||
int i;
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
/* Defer constraints */
|
||||
sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to defer constraints:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Truncate existing records */
|
||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get current records from primary */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||
get_repmgr_schema_quoted(masterconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(masterconn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -1149,20 +1234,23 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
log_err("Unable to retrieve node records from master:\n%s\n",
|
||||
PQerrorMessage(masterconn));
|
||||
PQclear(res);
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Insert primary records into witness table */
|
||||
for (i = 0; i < PQntuples(res); i++)
|
||||
{
|
||||
bool node_record_created;
|
||||
|
||||
log_verbose(LOG_DEBUG,
|
||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 4),
|
||||
"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 3),
|
||||
PQgetvalue(res, i, 0));
|
||||
|
||||
node_record_created = create_node_record(witnessconn,
|
||||
"copy_configuration",
|
||||
"witness_copy_node_records",
|
||||
atoi(PQgetvalue(res, i, 0)),
|
||||
PQgetvalue(res, i, 1),
|
||||
strlen(PQgetvalue(res, i, 2))
|
||||
@@ -1174,7 +1262,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
atoi(PQgetvalue(res, i, 5)),
|
||||
strlen(PQgetvalue(res, i, 6))
|
||||
? PQgetvalue(res, i, 6)
|
||||
: NULL
|
||||
: NULL,
|
||||
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||
? true
|
||||
: false
|
||||
);
|
||||
|
||||
if (node_record_created == false)
|
||||
@@ -1183,11 +1274,16 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
|
||||
log_err("Unable to copy node record to witness database\n%s\n",
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
/* And finished */
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1200,7 +1296,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
* XXX we should pass the record parameters as a struct.
|
||||
*/
|
||||
bool
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char upstream_node_id[MAXLEN];
|
||||
@@ -1241,8 +1337,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes "
|
||||
" (id, type, upstream_node_id, cluster, "
|
||||
" name, conninfo, slot_name, priority) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
||||
" name, conninfo, slot_name, priority, active) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
node,
|
||||
type,
|
||||
@@ -1251,7 +1347,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
node_name,
|
||||
conninfo,
|
||||
slot_name_buf,
|
||||
priority);
|
||||
priority,
|
||||
active == true ? "TRUE" : "FALSE");
|
||||
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||
|
||||
@@ -1291,7 +1388,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
||||
|
||||
if (action != NULL)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||
}
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
@@ -1612,8 +1709,7 @@ int
|
||||
get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
int ntuples;
|
||||
int result;
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
@@ -1627,6 +1723,51 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
|
||||
|
||||
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int result;
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND name = '%s'",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
cluster,
|
||||
node_name);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery);
|
||||
|
||||
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info)
|
||||
{
|
||||
int ntuples;
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
@@ -1637,7 +1778,6 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
||||
|
||||
if (ntuples == 0)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1658,6 +1798,9 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int
|
||||
get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
||||
{
|
||||
|
||||
40
dbutils.h
40
dbutils.h
@@ -52,18 +52,6 @@ typedef struct s_node_info
|
||||
} t_node_info;
|
||||
|
||||
|
||||
/*
|
||||
* Struct to store replication slot information
|
||||
*/
|
||||
|
||||
typedef struct s_replication_slot
|
||||
{
|
||||
char slot_name[MAXLEN];
|
||||
char slot_type[MAXLEN];
|
||||
bool active;
|
||||
} t_replication_slot;
|
||||
|
||||
|
||||
#define T_NODE_INFO_INITIALIZER { \
|
||||
NODE_NOT_FOUND, \
|
||||
NO_UPSTREAM_NODE, \
|
||||
@@ -78,13 +66,27 @@ typedef struct s_replication_slot
|
||||
InvalidXLogRecPtr \
|
||||
}
|
||||
|
||||
/*
|
||||
* Struct to store replication slot information
|
||||
*/
|
||||
|
||||
typedef struct s_replication_slot
|
||||
{
|
||||
char slot_name[MAXLEN];
|
||||
char slot_type[MAXLEN];
|
||||
bool active;
|
||||
} t_replication_slot;
|
||||
|
||||
|
||||
|
||||
PGconn *_establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error,
|
||||
const bool log_notice);
|
||||
const bool log_notice,
|
||||
const bool verbose_only);
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *test_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||
PGconn *test_db_connection(const char *conninfo);
|
||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
const char *values[],
|
||||
const bool exit_on_error);
|
||||
@@ -115,16 +117,17 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||
int get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
|
||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
@@ -133,3 +136,4 @@ int get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
||||
t_server_type parse_node_type(const char *type);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
#endif
|
||||
|
||||
|
||||
4
debian/DEBIAN/control
vendored
4
debian/DEBIAN/control
vendored
@@ -1,9 +1,9 @@
|
||||
Package: repmgr-auto
|
||||
Version: 3.0.1
|
||||
Version: 3.1.3
|
||||
Section: database
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
Depends: rsync, postgresql-9.3 | postgresql-9.4
|
||||
Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5
|
||||
Maintainer: Self built package <user@localhost>
|
||||
Description: PostgreSQL replication setup, magament and monitoring
|
||||
has two main executables
|
||||
|
||||
194
dirmod.c
Normal file
194
dirmod.c
Normal file
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
*
|
||||
* dirmod.c
|
||||
* directory handling functions
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "postgres_fe.h"
|
||||
|
||||
/* Don't modify declarations in system headers */
|
||||
|
||||
#include <unistd.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/*
|
||||
* pgfnames
|
||||
*
|
||||
* return a list of the names of objects in the argument directory. Caller
|
||||
* must call pgfnames_cleanup later to free the memory allocated by this
|
||||
* function.
|
||||
*/
|
||||
char **
|
||||
pgfnames(const char *path)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *file;
|
||||
char **filenames;
|
||||
int numnames = 0;
|
||||
int fnsize = 200; /* enough for many small dbs */
|
||||
|
||||
dir = opendir(path);
|
||||
if (dir == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
filenames = (char **) palloc(fnsize * sizeof(char *));
|
||||
|
||||
while (errno = 0, (file = readdir(dir)) != NULL)
|
||||
{
|
||||
if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
|
||||
{
|
||||
if (numnames + 1 >= fnsize)
|
||||
{
|
||||
fnsize *= 2;
|
||||
filenames = (char **) repalloc(filenames,
|
||||
fnsize * sizeof(char *));
|
||||
}
|
||||
filenames[numnames++] = pstrdup(file->d_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (errno)
|
||||
{
|
||||
fprintf(stderr, _("could not read directory \"%s\": %s\n"),
|
||||
path, strerror(errno));
|
||||
}
|
||||
|
||||
filenames[numnames] = NULL;
|
||||
|
||||
if (closedir(dir))
|
||||
{
|
||||
fprintf(stderr, _("could not close directory \"%s\": %s\n"),
|
||||
path, strerror(errno));
|
||||
}
|
||||
|
||||
return filenames;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* pgfnames_cleanup
|
||||
*
|
||||
* deallocate memory used for filenames
|
||||
*/
|
||||
void
|
||||
pgfnames_cleanup(char **filenames)
|
||||
{
|
||||
char **fn;
|
||||
|
||||
for (fn = filenames; *fn; fn++)
|
||||
pfree(*fn);
|
||||
|
||||
pfree(filenames);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* rmtree
|
||||
*
|
||||
* Delete a directory tree recursively.
|
||||
* Assumes path points to a valid directory.
|
||||
* Deletes everything under path.
|
||||
* If rmtopdir is true deletes the directory too.
|
||||
* Returns true if successful, false if there was any problem.
|
||||
* (The details of the problem are reported already, so caller
|
||||
* doesn't really have to say anything more, but most do.)
|
||||
*/
|
||||
bool
|
||||
rmtree(const char *path, bool rmtopdir)
|
||||
{
|
||||
bool result = true;
|
||||
char pathbuf[MAXPGPATH];
|
||||
char **filenames;
|
||||
char **filename;
|
||||
struct stat statbuf;
|
||||
|
||||
/*
|
||||
* we copy all the names out of the directory before we start modifying
|
||||
* it.
|
||||
*/
|
||||
filenames = pgfnames(path);
|
||||
|
||||
if (filenames == NULL)
|
||||
return false;
|
||||
|
||||
/* now we have the names we can start removing things */
|
||||
for (filename = filenames; *filename; filename++)
|
||||
{
|
||||
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
|
||||
|
||||
/*
|
||||
* It's ok if the file is not there anymore; we were just about to
|
||||
* delete it anyway.
|
||||
*
|
||||
* This is not an academic possibility. One scenario where this
|
||||
* happens is when bgwriter has a pending unlink request for a file in
|
||||
* a database that's being dropped. In dropdb(), we call
|
||||
* ForgetDatabaseFsyncRequests() to flush out any such pending unlink
|
||||
* requests, but because that's asynchronous, it's not guaranteed that
|
||||
* the bgwriter receives the message in time.
|
||||
*/
|
||||
if (lstat(pathbuf, &statbuf) != 0)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
/* call ourselves recursively for a directory */
|
||||
if (!rmtree(pathbuf, true))
|
||||
{
|
||||
/* we already reported the error */
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlink(pathbuf) != 0)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rmtopdir)
|
||||
{
|
||||
if (rmdir(path) != 0)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
|
||||
pgfnames_cleanup(filenames);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
23
dirmod.h
Normal file
23
dirmod.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* dirmod.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _DIRMOD_H_
|
||||
#define _DIRMOD_H_
|
||||
|
||||
#endif
|
||||
@@ -29,7 +29,6 @@
|
||||
#define ERR_DB_CON 6
|
||||
#define ERR_DB_QUERY 7
|
||||
#define ERR_PROMOTED 8
|
||||
#define ERR_BAD_PASSWORD 9
|
||||
#define ERR_STR_OVERFLOW 10
|
||||
#define ERR_FAILOVER_FAIL 11
|
||||
#define ERR_BAD_SSH 12
|
||||
@@ -37,5 +36,7 @@
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
#define ERR_BAD_BACKUP_LABEL 17
|
||||
#define ERR_SWITCHOVER_FAIL 18
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
7
log.c
7
log.c
@@ -40,7 +40,8 @@
|
||||
/* #define REPMGR_DEBUG */
|
||||
|
||||
static int detect_log_facility(const char *facility);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_NOTICE;
|
||||
@@ -48,7 +49,7 @@ int last_log_level = LOG_NOTICE;
|
||||
int verbose_logging = false;
|
||||
int terse_logging = false;
|
||||
|
||||
void
|
||||
extern void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
@@ -141,7 +142,7 @@ log_verbose(int level, const char *fmt, ...)
|
||||
|
||||
|
||||
bool
|
||||
logger_init(t_configuration_options * opts, const char *ident)
|
||||
logger_init(t_configuration_options *opts, const char *ident)
|
||||
{
|
||||
char *level = opts->loglevel;
|
||||
char *facility = opts->logfacility;
|
||||
|
||||
12
log.h
12
log.h
@@ -25,7 +25,7 @@
|
||||
#define REPMGR_SYSLOG 1
|
||||
#define REPMGR_STDERR 2
|
||||
|
||||
void
|
||||
extern void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
|
||||
@@ -123,10 +123,14 @@ bool logger_shutdown(void);
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
|
||||
void log_hint(const char *fmt, ...);
|
||||
void log_verbose(int level, const char *fmt, ...);
|
||||
void log_hint(const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
|
||||
void log_verbose(int level, const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
|
||||
extern int log_type;
|
||||
extern int log_level;
|
||||
extern int verbose_logging;
|
||||
extern int terse_logging;
|
||||
|
||||
#endif
|
||||
#endif /* _REPMGR_LOG_H_ */
|
||||
|
||||
@@ -15,21 +15,29 @@
|
||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||
# to facilitate easier querying of the repmgr views and tables.
|
||||
cluster=example_cluster
|
||||
#cluster=example_cluster
|
||||
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
# replication funcion, as this will cause confusion when e.g.
|
||||
# replication function, as this will cause confusion when e.g.
|
||||
# "standby2" is promoted to primary)
|
||||
node=2 # a unique integer
|
||||
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
#node=2 # a unique integer
|
||||
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
# the server's hostname or another identifier unambiguously
|
||||
# associated with the server to avoid confusion
|
||||
|
||||
# Database connection information as a conninfo string
|
||||
# This must be accessible to all servers in the cluster; for details see:
|
||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
#
|
||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
#
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
#
|
||||
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
||||
# conninfo string to determine the length of time which elapses before
|
||||
# a network connection attempt is abandoned; for details see:
|
||||
#
|
||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
|
||||
|
||||
# Optional configuration items
|
||||
# ============================
|
||||
@@ -37,15 +45,16 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
# Replication settings
|
||||
# ---------------------
|
||||
|
||||
# when using cascading replication and a standby is to be connected to an
|
||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||
# must exist before the new standby can be registered. If a standby is
|
||||
# to connect directly to a primary node, this parameter is not required.
|
||||
upstream_node=1
|
||||
# When using cascading replication, a standby can connect to another
|
||||
# upstream standby node which is specified by setting 'upstream_node'.
|
||||
# In that case, the upstream node must exist before the new standby
|
||||
# can be registered. If 'upstream_node' is not set, then the standby
|
||||
# will connect directly to the primary node.
|
||||
#upstream_node=1
|
||||
|
||||
# use physical replication slots - PostgreSQL 9.4 and later only
|
||||
# (default: 0)
|
||||
use_replication_slots=0
|
||||
#use_replication_slots=0
|
||||
|
||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||
# number of standbys which will connect to the primary.
|
||||
@@ -55,15 +64,15 @@ use_replication_slots=0
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# (default: NOTICE)
|
||||
loglevel=NOTICE
|
||||
#loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# (default: STDERR)
|
||||
logfacility=STDERR
|
||||
#logfacility=STDERR
|
||||
|
||||
# stderr can be redirected to an arbitrary file:
|
||||
#
|
||||
logfile='/var/log/repmgr/repmgr.log'
|
||||
#logfile='/var/log/repmgr/repmgr.log'
|
||||
|
||||
# event notifications can be passed to an arbitrary external program
|
||||
# together with the following parameters:
|
||||
@@ -77,12 +86,12 @@ logfile='/var/log/repmgr/repmgr.log'
|
||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||
# so should be quoted in the provided command configuration, e.g.:
|
||||
#
|
||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones:
|
||||
#
|
||||
event_notifications=master_register,standby_register,witness_create
|
||||
#event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
|
||||
# Environment/command settings
|
||||
@@ -90,17 +99,17 @@ event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
pg_bindir=/usr/bin/
|
||||
#pg_bindir=/usr/bin/
|
||||
|
||||
# external command options
|
||||
|
||||
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
ssh_options=-o "StrictHostKeyChecking no"
|
||||
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
#ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
# external command arguments. Values shown are examples.
|
||||
|
||||
pg_ctl_options='-s'
|
||||
pg_basebackup_options='--xlog-method=s'
|
||||
#pg_ctl_options='-s'
|
||||
#pg_basebackup_options='--xlog-method=s'
|
||||
|
||||
|
||||
# Standby clone settings
|
||||
@@ -112,6 +121,10 @@ pg_basebackup_options='--xlog-method=s'
|
||||
#
|
||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
||||
|
||||
# You can specify a restore_command to be used in the recovery.conf that
|
||||
# will be placed in the cloned standby
|
||||
#
|
||||
# restore_command = cp /path/to/archived/wals/%f %p
|
||||
|
||||
# Failover settings (repmgrd)
|
||||
# ---------------------------
|
||||
@@ -122,27 +135,38 @@ pg_basebackup_options='--xlog-method=s'
|
||||
# Number of seconds to wait for a response from the primary server before
|
||||
# deciding it has failed.
|
||||
|
||||
master_response_timeout=60
|
||||
#master_response_timeout=60
|
||||
|
||||
# Number of attempts at what interval (in seconds) to try and
|
||||
# connect to a server to establish its status (e.g. master
|
||||
# during failover)
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
#reconnect_attempts=6
|
||||
#reconnect_interval=10
|
||||
|
||||
# Autofailover options
|
||||
failover=manual # one of 'automatic', 'manual'
|
||||
# (default: manual)
|
||||
priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||
#failover=manual # one of 'automatic', 'manual' (default: manual)
|
||||
# defines the action to take in the event of upstream failure
|
||||
#
|
||||
# 'automatic': repmgrd will automatically attempt to promote the
|
||||
# node or follow the new upstream node
|
||||
# 'manual': repmgrd will take no action and the mode will require
|
||||
# manual attention to reattach it to replication
|
||||
|
||||
#priority=100 # indicate a preferred priorty for promoting nodes
|
||||
# a value of zero or less prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# monitoring interval in seconds; default is 2
|
||||
monitor_interval_secs=2
|
||||
#monitor_interval_secs=2
|
||||
|
||||
# change wait time for primary; before we bail out and exit when the primary
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
retry_promote_interval_secs=300
|
||||
#retry_promote_interval_secs=300
|
||||
|
||||
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||
#witness_repl_nodes_sync_interval_secs=15
|
||||
|
||||
41
repmgr.h
41
repmgr.h
@@ -28,12 +28,11 @@
|
||||
#include "dbutils.h"
|
||||
#include "errcode.h"
|
||||
#include "config.h"
|
||||
#include "dirmod.h"
|
||||
|
||||
#define MIN_SUPPORTED_VERSION "9.3"
|
||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||
|
||||
#include "config.h"
|
||||
#define MAXFILENAME 1024
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
@@ -48,6 +47,15 @@
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
#define UNKNOWN_NODE_ID -1
|
||||
|
||||
#define OPT_HELP 1
|
||||
#define OPT_CHECK_UPSTREAM_CONFIG 2
|
||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 3
|
||||
#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 4
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 5
|
||||
#define OPT_PG_REWIND 6
|
||||
#define OPT_PWPROMPT 7
|
||||
#define OPT_CSV 8
|
||||
#define OPT_INITDB_NO_PWPROMPT 9
|
||||
|
||||
|
||||
/* Run time options type */
|
||||
@@ -57,8 +65,8 @@ typedef struct
|
||||
char dbname[MAXLEN];
|
||||
char host[MAXLEN];
|
||||
char username[MAXLEN];
|
||||
char dest_dir[MAXFILENAME];
|
||||
char config_file[MAXFILENAME];
|
||||
char dest_dir[MAXPGPATH];
|
||||
char config_file[MAXPGPATH];
|
||||
char remote_user[MAXLEN];
|
||||
char superuser[MAXLEN];
|
||||
char wal_keep_segments[MAXLEN];
|
||||
@@ -67,11 +75,11 @@ typedef struct
|
||||
bool force;
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool initdb_no_pwprompt;
|
||||
bool witness_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
bool ignore_external_config_files;
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
bool csv_mode;
|
||||
char masterport[MAXLEN];
|
||||
/*
|
||||
* configuration file parameters which can be overridden on the
|
||||
@@ -81,7 +89,8 @@ typedef struct
|
||||
|
||||
/* parameter used by STANDBY SWITCHOVER */
|
||||
char remote_config_file[MAXLEN];
|
||||
char pg_rewind[MAXFILENAME];
|
||||
char pg_rewind[MAXPGPATH];
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||
char config_archive_dir[MAXLEN];
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
@@ -91,11 +100,23 @@ typedef struct
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
|
||||
/* deprecated command line option */
|
||||
char localport[MAXLEN];
|
||||
/* deprecated command line options */
|
||||
char localport[MAXLEN];
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "" }
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", ""}
|
||||
|
||||
struct BackupLabel
|
||||
{
|
||||
XLogRecPtr start_wal_location;
|
||||
char start_wal_file[MAXLEN];
|
||||
XLogRecPtr checkpoint_location;
|
||||
char backup_from[MAXLEN];
|
||||
char backup_method[MAXLEN];
|
||||
char start_time[MAXLEN];
|
||||
char label[MAXLEN];
|
||||
XLogRecPtr min_failover_slot_lsn;
|
||||
};
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
extern bool config_file_found;
|
||||
|
||||
@@ -63,6 +63,15 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
|
||||
|
||||
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
||||
|
||||
/* There's also an event table which we need to create */
|
||||
CREATE TABLE repl_events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
/* When you're sure of your changes, commit them */
|
||||
|
||||
-- COMMIT;
|
||||
|
||||
32
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
32
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
|
||||
* --------------------------------------------------
|
||||
*
|
||||
* This update is only required if repmgrd is being used in conjunction
|
||||
* with a witness server.
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statement listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey,
|
||||
ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE;
|
||||
COMMIT;
|
||||
@@ -83,7 +83,12 @@ _PG_init(void)
|
||||
* resources in repmgr_shmem_startup().
|
||||
*/
|
||||
RequestAddinShmemSpace(repmgr_memsize());
|
||||
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
RequestNamedLWLockTranche("repmgr", 1);
|
||||
#else
|
||||
RequestAddinLWLocks(1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Install hooks.
|
||||
@@ -128,7 +133,11 @@ repmgr_shmem_startup(void)
|
||||
if (!found)
|
||||
{
|
||||
/* First time through ... */
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
|
||||
#else
|
||||
shared_state->lock = LWLockAssign();
|
||||
#endif
|
||||
snprintf(shared_state->location,
|
||||
sizeof(shared_state->location), "%X/%X", 0, 0);
|
||||
}
|
||||
|
||||
@@ -24,12 +24,17 @@
|
||||
#include <stdlib.h>
|
||||
#include "errcode.h"
|
||||
|
||||
|
||||
#define QUERY_STR_LEN 8192
|
||||
#define MAXLEN 1024
|
||||
#define MAXLINELENGTH 4096
|
||||
#define MAXVERSIONSTR 16
|
||||
#define MAXCONNINFO 1024
|
||||
|
||||
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
||||
#define STR(x) CppAsString(x)
|
||||
|
||||
#define MAXLEN_STR STR(MAXLEN)
|
||||
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
|
||||
Reference in New Issue
Block a user