mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
61 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16896510dc | ||
|
|
1c155a1088 | ||
|
|
31d57f4122 | ||
|
|
7b313b9d71 | ||
|
|
cf126642bd | ||
|
|
52281fcde8 | ||
|
|
de573edaaa | ||
|
|
4cb7f301ad | ||
|
|
87d8de4441 | ||
|
|
6db742f81e | ||
|
|
c79933685c | ||
|
|
04ba672b9f | ||
|
|
4f4111063a | ||
|
|
3a3a536e6d | ||
|
|
6f7206a5a1 | ||
|
|
f9fd1dd227 | ||
|
|
8140ba9c27 | ||
|
|
32dba444e1 | ||
|
|
8212ff8d8a | ||
|
|
1ccd0edad2 | ||
|
|
59b31dd1ca | ||
|
|
300b9f0cc2 | ||
|
|
0efee4cf65 | ||
|
|
0cb2584886 | ||
|
|
b88d27248c | ||
|
|
683c54325e | ||
|
|
70d398cd47 | ||
|
|
7b7d80e5f2 | ||
|
|
96b0e26084 | ||
|
|
91c498f6f1 | ||
|
|
d48093e732 | ||
|
|
3f0d1754a4 | ||
|
|
f27979bbe1 | ||
|
|
e9445a5d5e | ||
|
|
9a2717b5e3 | ||
|
|
dd6ea1cd77 | ||
|
|
de5908c122 | ||
|
|
4b5c84921c | ||
|
|
aaa8d70cef | ||
|
|
ca31b846e7 | ||
|
|
a27cecb559 | ||
|
|
cf0cdfa6a1 | ||
|
|
31489d92c0 | ||
|
|
b7fd13aed2 | ||
|
|
3c4bf27aa7 | ||
|
|
0ebd9c15d9 | ||
|
|
f9dba283d4 | ||
|
|
205f1cebbb | ||
|
|
4d97c1ebf7 | ||
|
|
12c395e91f | ||
|
|
bd1e4f71d6 | ||
|
|
cb49071ea4 | ||
|
|
5ad674edff | ||
|
|
ac09bad89c | ||
|
|
009d92fec8 | ||
|
|
b3d8a68a1d | ||
|
|
05b47cb2a8 | ||
|
|
dc542a1b7d | ||
|
|
6ce8058749 | ||
|
|
2edcac77f0 | ||
|
|
f740374392 |
24
HISTORY
24
HISTORY
@@ -1,4 +1,26 @@
|
|||||||
3.1.0 2016-01-
|
3.1.3 2016-05-17
|
||||||
|
repmgrd: enable monitoring when a standby is catching up by
|
||||||
|
replaying archived WAL (Ian)
|
||||||
|
repmgrd: when upstream_node_id is NULL, assume upstream node
|
||||||
|
to be current master (Ian)
|
||||||
|
repmgrd: check for reappearance of the master node if standby
|
||||||
|
promotion fails (Ian)
|
||||||
|
improve handling of rsync failure conditions (Martín)
|
||||||
|
|
||||||
|
3.1.2 2016-04-12
|
||||||
|
Fix pg_ctl path generation in do_standby_switchover() (Ian)
|
||||||
|
Regularly sync witness server repl_nodes table (Ian)
|
||||||
|
Documentation improvements (Gianni, dhyannataraj)
|
||||||
|
(Experimental) ensure repmgr handles failover slots when copying
|
||||||
|
in rsync mode (Craig, Ian)
|
||||||
|
rsync mode handling fixes (Martín)
|
||||||
|
Enable repmgr to compile against 9.6devel (Ian)
|
||||||
|
|
||||||
|
3.1.1 2016-02-24
|
||||||
|
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
|
||||||
|
Prevent repmgr/repmgrd running as root (Ian)
|
||||||
|
|
||||||
|
3.1.0 2016-02-01
|
||||||
Add "repmgr standby switchover" command (Ian)
|
Add "repmgr standby switchover" command (Ian)
|
||||||
Revised README file (Ian)
|
Revised README file (Ian)
|
||||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||||
|
|||||||
23
Makefile
23
Makefile
@@ -2,6 +2,8 @@
|
|||||||
# Makefile
|
# Makefile
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
|
|
||||||
|
HEADERS = $(wildcard *.h)
|
||||||
|
|
||||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||||
|
|
||||||
@@ -10,15 +12,22 @@ DATA = repmgr.sql uninstall_repmgr.sql
|
|||||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||||
PG_LIBS = $(libpq_pgport)
|
PG_LIBS = $(libpq_pgport)
|
||||||
|
|
||||||
|
|
||||||
all: repmgrd repmgr
|
all: repmgrd repmgr
|
||||||
$(MAKE) -C sql
|
$(MAKE) -C sql
|
||||||
|
|
||||||
repmgrd: $(repmgrd_OBJS)
|
repmgrd: $(repmgrd_OBJS)
|
||||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||||
$(MAKE) -C sql
|
$(MAKE) -C sql
|
||||||
|
|
||||||
repmgr: $(repmgr_OBJS)
|
repmgr: $(repmgr_OBJS)
|
||||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||||
|
|
||||||
|
# Make all objects depend on all include files. This is a bit of a
|
||||||
|
# shotgun approach, but the codebase is small enough that a complete rebuild
|
||||||
|
# is very fast anyway.
|
||||||
|
$(repmgr_OBJS): $(HEADERS)
|
||||||
|
$(repmgrd_OBJS): $(HEADERS)
|
||||||
|
|
||||||
ifdef USE_PGXS
|
ifdef USE_PGXS
|
||||||
PG_CONFIG = pg_config
|
PG_CONFIG = pg_config
|
||||||
@@ -31,8 +40,8 @@ include $(top_builddir)/src/Makefile.global
|
|||||||
include $(top_srcdir)/contrib/contrib-global.mk
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
# XXX: This overrides the pgxs install target - we're building two binaries,
|
||||||
# is overriding pgxs install.
|
# which is not supported by pgxs.mk's PROGRAM construct.
|
||||||
install: install_prog install_ext
|
install: install_prog install_ext
|
||||||
|
|
||||||
install_prog:
|
install_prog:
|
||||||
@@ -43,6 +52,12 @@ install_prog:
|
|||||||
install_ext:
|
install_ext:
|
||||||
$(MAKE) -C sql install
|
$(MAKE) -C sql install
|
||||||
|
|
||||||
|
# Distribution-specific package building targets
|
||||||
|
# ----------------------------------------------
|
||||||
|
#
|
||||||
|
# XXX we recommend using the PGDG-supplied packages where possible;
|
||||||
|
# see README.md for details.
|
||||||
|
|
||||||
install_rhel:
|
install_rhel:
|
||||||
mkdir -p '$(DESTDIR)/etc/init.d/'
|
mkdir -p '$(DESTDIR)/etc/init.d/'
|
||||||
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
||||||
|
|||||||
114
README.md
114
README.md
@@ -33,10 +33,14 @@ provides a single read/write master server and one or more read-only standbys
|
|||||||
containing near-real time copies of the master server's database.
|
containing near-real time copies of the master server's database.
|
||||||
|
|
||||||
For a multi-master replication solution, please see 2ndQuadrant's BDR
|
For a multi-master replication solution, please see 2ndQuadrant's BDR
|
||||||
(bi-directional replication) extension. For selective replication, e.g.
|
(bi-directional replication) extension.
|
||||||
of individual tables or databases from one server to another, please
|
|
||||||
see 2ndQuadrant's pglogical extension.
|
|
||||||
|
|
||||||
|
http://2ndquadrant.com/en-us/resources/bdr/
|
||||||
|
|
||||||
|
For selective replication, e.g. of individual tables or databases from one server
|
||||||
|
to another, please see 2ndQuadrant's pglogical extension.
|
||||||
|
|
||||||
|
http://2ndquadrant.com/en-us/resources/pglogical/
|
||||||
|
|
||||||
### Concepts
|
### Concepts
|
||||||
|
|
||||||
@@ -109,16 +113,16 @@ tables:
|
|||||||
- `repl_monitor`: historical standby monitoring information written by `repmgrd`
|
- `repl_monitor`: historical standby monitoring information written by `repmgrd`
|
||||||
|
|
||||||
views:
|
views:
|
||||||
- `repl_show_nodes`: based on the `repl_nodes` showing name of the server's
|
- `repl_show_nodes`: based on the table `repl_nodes`, additionally showing the
|
||||||
upstream node
|
name of the server's upstream node
|
||||||
- `repl_status`: when `repmgrd`'s monitoring is enabled, shows current monitoring
|
- `repl_status`: when `repmgrd`'s monitoring is enabled, shows current monitoring
|
||||||
status for each node
|
status for each node
|
||||||
|
|
||||||
The `repmgr` metadata schema can be stored in an existing database or in its own
|
The `repmgr` metadata schema can be stored in an existing database or in its own
|
||||||
dedicated database.
|
dedicated database.
|
||||||
|
|
||||||
A dedicated superuser is required to own the meta-database as well as carry out
|
A dedicated database superuser is required to own the meta-database as well as carry
|
||||||
administrative actions.
|
out administrative actions.
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
------------
|
------------
|
||||||
@@ -128,7 +132,9 @@ Installation
|
|||||||
`repmgr` is developed and tested on Linux and OS X, but should work on any
|
`repmgr` is developed and tested on Linux and OS X, but should work on any
|
||||||
UNIX-like system supported by PostgreSQL itself.
|
UNIX-like system supported by PostgreSQL itself.
|
||||||
|
|
||||||
`repmgr` supports PostgreSQL from version 9.3.
|
Current versions of `repmgr` support PostgreSQL from version 9.3. If you are
|
||||||
|
interested in using `repmgr` on earlier versions of PostgreSQL you can download
|
||||||
|
version 2.1 which supports PostgreSQL from version 9.1.
|
||||||
|
|
||||||
All servers in the replication cluster must be running the same major version of
|
All servers in the replication cluster must be running the same major version of
|
||||||
PostgreSQL, and we recommend that they also run the same minor version.
|
PostgreSQL, and we recommend that they also run the same minor version.
|
||||||
@@ -137,7 +143,7 @@ The `repmgr` tools must be installed on each server in the replication cluster.
|
|||||||
|
|
||||||
A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
|
A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
|
||||||
`repmgrd` actions require direct access to the PostgreSQL data directory,
|
`repmgrd` actions require direct access to the PostgreSQL data directory,
|
||||||
it should executed by the `postgres` user.
|
it should be executed by the `postgres` user.
|
||||||
|
|
||||||
Additionally, we recommend installing `rsync` and enabling passwordless
|
Additionally, we recommend installing `rsync` and enabling passwordless
|
||||||
`ssh` connectivity between all servers in the replication cluster.
|
`ssh` connectivity between all servers in the replication cluster.
|
||||||
@@ -186,7 +192,8 @@ PostgreSQL itself.
|
|||||||
`repmgr` and `repmgrd` use a common configuration file, by default called
|
`repmgr` and `repmgrd` use a common configuration file, by default called
|
||||||
`repmgr.conf` (although any name can be used if explicitly specified).
|
`repmgr.conf` (although any name can be used if explicitly specified).
|
||||||
At the very least, `repmgr.conf` must contain the connection parameters
|
At the very least, `repmgr.conf` must contain the connection parameters
|
||||||
for the local `repmgr` database.
|
for the local `repmgr` database; see `repmgr configuration file` below
|
||||||
|
for more details.
|
||||||
|
|
||||||
The configuration file will be searched for in the following locations:
|
The configuration file will be searched for in the following locations:
|
||||||
|
|
||||||
@@ -277,11 +284,11 @@ similar to the following:
|
|||||||
|
|
||||||
local replication repmgr trust
|
local replication repmgr trust
|
||||||
host replication repmgr 127.0.0.1/32 trust
|
host replication repmgr 127.0.0.1/32 trust
|
||||||
host replication repmgr 192.168.1.0/32 trust
|
host replication repmgr 192.168.1.0/24 trust
|
||||||
|
|
||||||
local repmgr repmgr trust
|
local repmgr repmgr trust
|
||||||
host repmgr repmgr 127.0.0.1/32 trust
|
host repmgr repmgr 127.0.0.1/32 trust
|
||||||
host repmgr repmgr 192.168.1.0/32 trust
|
host repmgr repmgr 192.168.1.0/24 trust
|
||||||
|
|
||||||
Adjust according to your network environment and authentication requirements.
|
Adjust according to your network environment and authentication requirements.
|
||||||
|
|
||||||
@@ -364,11 +371,11 @@ Clone the standby with:
|
|||||||
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
||||||
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
||||||
|
|
||||||
This will clone the PostgreSQL data directory files from the master using
|
This will clone the PostgreSQL data directory files from the master at repmgr_node1
|
||||||
PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
using PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
||||||
correct parameters to start streaming from the master server will be created
|
correct parameters to start streaming from this master server will be created
|
||||||
automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf`
|
automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf`
|
||||||
files will be copied.
|
files will be copied from the master.
|
||||||
|
|
||||||
Make any adjustments to the PostgreSQL configuration files now, then start the
|
Make any adjustments to the PostgreSQL configuration files now, then start the
|
||||||
standby server.
|
standby server.
|
||||||
@@ -377,10 +384,10 @@ standby server.
|
|||||||
|
|
||||||
> *NOTE*: `repmgr standby clone` does not require `repmgr.conf`, however we
|
> *NOTE*: `repmgr standby clone` does not require `repmgr.conf`, however we
|
||||||
> recommend providing this as `repmgr` will set the `application_name` parameter
|
> recommend providing this as `repmgr` will set the `application_name` parameter
|
||||||
> in `recovery.conf` as value provided in `node_name`, making it easier to identify
|
> in `recovery.conf` as the value provided in `node_name`, making it easier to
|
||||||
> the node in `pg_stat_replication`. It's also possible to provide some advanced
|
> identify the node in `pg_stat_replication`. It's also possible to provide some
|
||||||
> options for controlling the standby cloning process; see next section for
|
> advanced options for controlling the standby cloning process; see next section
|
||||||
> details.
|
> for details.
|
||||||
|
|
||||||
* * *
|
* * *
|
||||||
|
|
||||||
@@ -425,20 +432,20 @@ table:
|
|||||||
2 | standby | 1 | test | node2 | host=repmgr_node2 dbname=repmgr user=repmgr | | 100 | t
|
2 | standby | 1 | test | node2 | host=repmgr_node2 dbname=repmgr user=repmgr | | 100 | t
|
||||||
(2 rows)
|
(2 rows)
|
||||||
|
|
||||||
The standby server now has a copy of records for all servers in the replication
|
The standby server now has a copy of the records for all servers in the
|
||||||
cluster. Note that the relationship between master and standby is explicitly
|
replication cluster. Note that the relationship between master and standby is
|
||||||
defined via the `upstream_node_id` value, which shows here that the standby's
|
explicitly defined via the `upstream_node_id` value, which shows here that the
|
||||||
upstream server is the replication cluster master. While of limited use
|
standby's upstream server is the replication cluster master. While of limited
|
||||||
in a simple master/standby replication cluster, this information is required
|
use in a simple master/standby replication cluster, this information is required
|
||||||
to effectively manage cascading replication (see below).
|
to effectively manage cascading replication (see below).
|
||||||
|
|
||||||
|
|
||||||
Advanced options for cloning a standby
|
Advanced options for cloning a standby
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
The above section demonstrates the simplest possible way to clone
|
The above section demonstrates the simplest possible way to cloneb a standby
|
||||||
a standby server. Depending on your situation, finer-grained control
|
server. Depending on your circumstances, finer-grained controlover the cloning
|
||||||
over the cloning process may be necessary.
|
process may be necessary.
|
||||||
|
|
||||||
### pg_basebackup options when cloning a standby
|
### pg_basebackup options when cloning a standby
|
||||||
|
|
||||||
@@ -449,8 +456,8 @@ However this may impact performance of the server being cloned from
|
|||||||
so should be used with care.
|
so should be used with care.
|
||||||
|
|
||||||
Further options can be passed to the `pg_basebackup` utility via
|
Further options can be passed to the `pg_basebackup` utility via
|
||||||
the `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||||
documentation for more details:
|
documentation for more details of available options:
|
||||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||||
|
|
||||||
### Using rsync to clone a standby
|
### Using rsync to clone a standby
|
||||||
@@ -474,7 +481,7 @@ fresh clone with `pg_basebackup`.
|
|||||||
|
|
||||||
By default, `repmgr` will attempt to copy the standard configuration files
|
By default, `repmgr` will attempt to copy the standard configuration files
|
||||||
(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
|
(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
|
||||||
outside of the data directory (though note currently they will be copied
|
outside of the data directory (though currently they will be copied
|
||||||
into the standby's data directory). To prevent this happening, when executing
|
into the standby's data directory). To prevent this happening, when executing
|
||||||
`repmgr standby clone` provide the `--ignore-external-config-files` option.
|
`repmgr standby clone` provide the `--ignore-external-config-files` option.
|
||||||
|
|
||||||
@@ -702,16 +709,16 @@ Performing a switchover with repmgr
|
|||||||
A typical use-case for replication is a combination of master and standby
|
A typical use-case for replication is a combination of master and standby
|
||||||
server, with the standby serving as a backup which can easily be activated
|
server, with the standby serving as a backup which can easily be activated
|
||||||
in case of a problem with the master. Such an unplanned failover would
|
in case of a problem with the master. Such an unplanned failover would
|
||||||
normally be handled by promoting the standby, after which appropriate action
|
normally be handled by promoting the standby, after which an appropriate
|
||||||
taken to restore the old master.
|
action must be taken to restore the old master.
|
||||||
|
|
||||||
In some cases however it's desirable to promote the standby in a planned
|
In some cases however it's desirable to promote the standby in a planned
|
||||||
way, e.g. so maintenance can be performed on the master; this kind of switchover
|
way, e.g. so maintenance can be performed on the master; this kind of switchover
|
||||||
is supported by the `repmgr standby switchover` command.
|
is supported by the `repmgr standby switchover` command.
|
||||||
|
|
||||||
`repmgr standby switchover` differs from other `repmgr` actions in that it
|
`repmgr standby switchover` differs from other `repmgr` actions in that it
|
||||||
also performs actions on another server, for which reason both passwordless
|
also performs actions on another server, for which reason you must provide
|
||||||
SSH access and the path of `repmgr.conf` on that server.
|
both passwordless SSH access and the path of `repmgr.conf` on that server.
|
||||||
|
|
||||||
* * *
|
* * *
|
||||||
|
|
||||||
@@ -869,6 +876,10 @@ be set in `repmgr.conf`:
|
|||||||
|
|
||||||
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
|
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
|
||||||
|
|
||||||
|
Additionally, `postgresql.conf` must contain the following line:
|
||||||
|
|
||||||
|
shared_preload_libraries = 'repmgr_funcs'
|
||||||
|
|
||||||
When `failover` is set to `automatic`, upon detecting failure of the current
|
When `failover` is set to `automatic`, upon detecting failure of the current
|
||||||
master, `repmgrd` will execute one of `promote_command` or `follow_command`,
|
master, `repmgrd` will execute one of `promote_command` or `follow_command`,
|
||||||
depending on whether the current server is becoming the new master or
|
depending on whether the current server is becoming the new master or
|
||||||
@@ -991,8 +1002,11 @@ Monitoring
|
|||||||
----------
|
----------
|
||||||
|
|
||||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
||||||
constantly write node status information to the `repl_monitor` table, which can
|
constantly write standby node status information to the `repl_monitor` table,
|
||||||
be queried easily using the view `repl_status`:
|
providing a near-real time overview of replication status on all nodes
|
||||||
|
in the cluster.
|
||||||
|
|
||||||
|
The view `repl_status` shows the most recent state for each node, e.g.:
|
||||||
|
|
||||||
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
||||||
-[ RECORD 1 ]-------------+-----------------------------
|
-[ RECORD 1 ]-------------+-----------------------------
|
||||||
@@ -1017,6 +1031,10 @@ table , it's advisable to regularly purge historical data with
|
|||||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||||
many day's worth of data should be retained.
|
many day's worth of data should be retained.
|
||||||
|
|
||||||
|
Note that when a standby node is not streaming directly from its upstream
|
||||||
|
node, i.e. recovering WAL from an archive, `apply_lag` will always
|
||||||
|
appear as `0 bytes`.
|
||||||
|
|
||||||
|
|
||||||
Using a witness server with repmgrd
|
Using a witness server with repmgrd
|
||||||
------------------------------------
|
------------------------------------
|
||||||
@@ -1042,7 +1060,6 @@ makes sense to create a witness server in conjunction with running
|
|||||||
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
repmgrd and cascading replication
|
repmgrd and cascading replication
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
@@ -1159,7 +1176,7 @@ configuration file is located if `-f/--config-file` is not supplied.
|
|||||||
### repmgr commands
|
### repmgr commands
|
||||||
|
|
||||||
The `repmgr` command line tool accepts commands for specific servers in the
|
The `repmgr` command line tool accepts commands for specific servers in the
|
||||||
replication in the format "`server type` `action`", or for the entire
|
replication in the format "`server_type` `action`", or for the entire
|
||||||
replication cluster in the format "`cluster` `action`". Each command is
|
replication cluster in the format "`cluster` `action`". Each command is
|
||||||
described below.
|
described below.
|
||||||
|
|
||||||
@@ -1247,16 +1264,29 @@ which contains connection details for the local database.
|
|||||||
time a failover occurs.
|
time a failover occurs.
|
||||||
|
|
||||||
Note that it only makes sense to create a witness server if `repmgrd`
|
Note that it only makes sense to create a witness server if `repmgrd`
|
||||||
is in use; see section "witness server" above.
|
is in use; see section "Using a witness server" above.
|
||||||
|
|
||||||
|
This command requires a `repmgr.conf` file containing a valid conninfo
|
||||||
|
string for the server to be created, as well as the other minimum required
|
||||||
|
parameters detailed in the section `repmgr configuration file` above.
|
||||||
|
|
||||||
By default the witness server will use port 5499 to facilitate easier setup
|
By default the witness server will use port 5499 to facilitate easier setup
|
||||||
on a server running an existing node.
|
on a server running an existing node. To use a different port, supply
|
||||||
|
this explicitly in the `repmgr.conf` conninfo string.
|
||||||
|
|
||||||
|
This command also requires the location of the witness server's data
|
||||||
|
directory to be provided (`-D/--datadir`) as well as valid connection
|
||||||
|
parameters for the master server.
|
||||||
|
|
||||||
|
By default this command will create a superuser and a repmgr user.
|
||||||
|
The `repmgr` user name will be extracted from the `conninfo` string
|
||||||
|
in `repmgr.conf`.
|
||||||
|
|
||||||
* `cluster show`
|
* `cluster show`
|
||||||
|
|
||||||
Displays information about each active node in the replication cluster. This
|
Displays information about each active node in the replication cluster. This
|
||||||
command polls each registered server and shows its role (master / standby /
|
command polls each registered server and shows its role (master / standby /
|
||||||
witness) or "FAILED" if the node doesn't respond. It polls each server
|
witness) or `FAILED` if the node doesn't respond. It polls each server
|
||||||
directly and can be run on any node in the cluster; this is also useful
|
directly and can be run on any node in the cluster; this is also useful
|
||||||
when analyzing connectivity from a particular node.
|
when analyzing connectivity from a particular node.
|
||||||
|
|
||||||
|
|||||||
5
config.c
5
config.c
@@ -235,6 +235,9 @@ parse_config(t_configuration_options *options)
|
|||||||
options->monitor_interval_secs = 2;
|
options->monitor_interval_secs = 2;
|
||||||
options->retry_promote_interval_secs = 300;
|
options->retry_promote_interval_secs = 300;
|
||||||
|
|
||||||
|
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||||
|
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||||
|
|
||||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||||
|
|
||||||
options->tablespace_mapping.head = NULL;
|
options->tablespace_mapping.head = NULL;
|
||||||
@@ -358,6 +361,8 @@ parse_config(t_configuration_options *options)
|
|||||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||||
|
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||||
|
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||||
else if (strcmp(name, "use_replication_slots") == 0)
|
else if (strcmp(name, "use_replication_slots") == 0)
|
||||||
/* XXX we should have a dedicated boolean argument format */
|
/* XXX we should have a dedicated boolean argument format */
|
||||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||||
|
|||||||
3
config.h
3
config.h
@@ -75,13 +75,14 @@ typedef struct
|
|||||||
char logfile[MAXLEN];
|
char logfile[MAXLEN];
|
||||||
int monitor_interval_secs;
|
int monitor_interval_secs;
|
||||||
int retry_promote_interval_secs;
|
int retry_promote_interval_secs;
|
||||||
|
int witness_repl_nodes_sync_interval_secs;
|
||||||
int use_replication_slots;
|
int use_replication_slots;
|
||||||
char event_notification_command[MAXLEN];
|
char event_notification_command[MAXLEN];
|
||||||
EventNotificationList event_notifications;
|
EventNotificationList event_notifications;
|
||||||
TablespaceList tablespace_mapping;
|
TablespaceList tablespace_mapping;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
typedef struct ErrorListCell
|
||||||
{
|
{
|
||||||
|
|||||||
107
dbutils.c
107
dbutils.c
@@ -420,7 +420,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
|||||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||||
parameter, datatype, op, value, datatype);
|
parameter, datatype, op, value, datatype);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -587,7 +587,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
upstream_conninfo = upstream_conninfo_out;
|
upstream_conninfo = upstream_conninfo_out;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
" SELECT un.conninfo, un.name, un.id "
|
" SELECT un.conninfo, un.id "
|
||||||
" FROM %s.repl_nodes un "
|
" FROM %s.repl_nodes un "
|
||||||
"INNER JOIN %s.repl_nodes n "
|
"INNER JOIN %s.repl_nodes n "
|
||||||
" ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
|
" ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
|
||||||
@@ -604,7 +604,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to get conninfo for upstream server\n%s\n"),
|
log_err(_("error when attempting to find upstream server\n%s\n"),
|
||||||
PQerrorMessage(standby_conn));
|
PQerrorMessage(standby_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -612,11 +612,38 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
|
|
||||||
if (!PQntuples(res))
|
if (!PQntuples(res))
|
||||||
{
|
{
|
||||||
log_notice(_("no record found for upstream server"));
|
PQclear(res);
|
||||||
|
log_debug("no record found for upstream server\n");
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
" SELECT un.conninfo, un.id "
|
||||||
|
" FROM %s.repl_nodes un "
|
||||||
|
" WHERE un.cluster = '%s' "
|
||||||
|
" AND un.type='master' "
|
||||||
|
" AND un.active IS TRUE",
|
||||||
|
get_repmgr_schema_quoted(standby_conn),
|
||||||
|
cluster);
|
||||||
|
res = PQexec(standby_conn, sqlquery);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_err(_("error when attempting to find active master server\n%s\n"),
|
||||||
|
PQerrorMessage(standby_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!PQntuples(res))
|
||||||
|
{
|
||||||
|
PQclear(res);
|
||||||
|
log_notice(_("no record found for active master server\n"));
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("record found for active master server\n");
|
||||||
|
}
|
||||||
|
|
||||||
strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
|
strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
|
||||||
|
|
||||||
if (upstream_node_id_ptr != NULL)
|
if (upstream_node_id_ptr != NULL)
|
||||||
@@ -889,7 +916,7 @@ get_repmgr_schema_quoted(PGconn *conn)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
create_replication_slot(PGconn *conn, char *slot_name)
|
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int query_res;
|
int query_res;
|
||||||
@@ -926,9 +953,19 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* In 9.6 and later, reserve the LSN straight away */
|
||||||
|
if (server_version_num >= 90600)
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
|
||||||
|
slot_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||||
slot_name);
|
slot_name);
|
||||||
|
}
|
||||||
|
|
||||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||||
@@ -1111,7 +1148,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* copy_configuration()
|
* witness_copy_node_records()
|
||||||
*
|
*
|
||||||
* Copy records in master's `repl_nodes` table to witness database
|
* Copy records in master's `repl_nodes` table to witness database
|
||||||
*
|
*
|
||||||
@@ -1119,29 +1156,49 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
|||||||
* `repmgrd` after a failover event occurs
|
* `repmgrd` after a failover event occurs
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||||
{
|
{
|
||||||
char sqlquery[MAXLEN];
|
char sqlquery[MAXLEN];
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
begin_transaction(witnessconn);
|
||||||
|
|
||||||
|
/* Defer constraints */
|
||||||
|
sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
|
||||||
|
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
res = PQexec(witnessconn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to defer constraints:\n%s\n"),
|
||||||
|
PQerrorMessage(witnessconn));
|
||||||
|
rollback_transaction(witnessconn);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Truncate existing records */
|
||||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(witnessconn, sqlquery);
|
res = PQexec(witnessconn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
||||||
PQerrorMessage(witnessconn));
|
PQerrorMessage(witnessconn));
|
||||||
|
rollback_transaction(witnessconn);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Get current records from primary */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||||
get_repmgr_schema_quoted(masterconn));
|
get_repmgr_schema_quoted(masterconn));
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(masterconn, sqlquery);
|
res = PQexec(masterconn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1149,20 +1206,23 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
log_err("Unable to retrieve node records from master:\n%s\n",
|
log_err("Unable to retrieve node records from master:\n%s\n",
|
||||||
PQerrorMessage(masterconn));
|
PQerrorMessage(masterconn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
rollback_transaction(witnessconn);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Insert primary records into witness table */
|
||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
bool node_record_created;
|
bool node_record_created;
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG,
|
log_verbose(LOG_DEBUG,
|
||||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
|
||||||
PQgetvalue(res, i, 4),
|
PQgetvalue(res, i, 3),
|
||||||
PQgetvalue(res, i, 0));
|
PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
node_record_created = create_node_record(witnessconn,
|
node_record_created = create_node_record(witnessconn,
|
||||||
"copy_configuration",
|
"witness_copy_node_records",
|
||||||
atoi(PQgetvalue(res, i, 0)),
|
atoi(PQgetvalue(res, i, 0)),
|
||||||
PQgetvalue(res, i, 1),
|
PQgetvalue(res, i, 1),
|
||||||
strlen(PQgetvalue(res, i, 2))
|
strlen(PQgetvalue(res, i, 2))
|
||||||
@@ -1174,7 +1234,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
atoi(PQgetvalue(res, i, 5)),
|
atoi(PQgetvalue(res, i, 5)),
|
||||||
strlen(PQgetvalue(res, i, 6))
|
strlen(PQgetvalue(res, i, 6))
|
||||||
? PQgetvalue(res, i, 6)
|
? PQgetvalue(res, i, 6)
|
||||||
: NULL
|
: NULL,
|
||||||
|
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||||
|
? true
|
||||||
|
: false
|
||||||
);
|
);
|
||||||
|
|
||||||
if (node_record_created == false)
|
if (node_record_created == false)
|
||||||
@@ -1183,11 +1246,16 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
|
|
||||||
log_err("Unable to copy node record to witness database\n%s\n",
|
log_err("Unable to copy node record to witness database\n%s\n",
|
||||||
PQerrorMessage(witnessconn));
|
PQerrorMessage(witnessconn));
|
||||||
|
rollback_transaction(witnessconn);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/* And finished */
|
||||||
|
commit_transaction(witnessconn);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1200,7 +1268,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
* XXX we should pass the record parameters as a struct.
|
* XXX we should pass the record parameters as a struct.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
char upstream_node_id[MAXLEN];
|
char upstream_node_id[MAXLEN];
|
||||||
@@ -1241,8 +1309,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_nodes "
|
"INSERT INTO %s.repl_nodes "
|
||||||
" (id, type, upstream_node_id, cluster, "
|
" (id, type, upstream_node_id, cluster, "
|
||||||
" name, conninfo, slot_name, priority) "
|
" name, conninfo, slot_name, priority, active) "
|
||||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||||
get_repmgr_schema_quoted(conn),
|
get_repmgr_schema_quoted(conn),
|
||||||
node,
|
node,
|
||||||
type,
|
type,
|
||||||
@@ -1251,7 +1319,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
node_name,
|
node_name,
|
||||||
conninfo,
|
conninfo,
|
||||||
slot_name_buf,
|
slot_name_buf,
|
||||||
priority);
|
priority,
|
||||||
|
active == true ? "TRUE" : "FALSE");
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||||
|
|
||||||
@@ -1291,7 +1360,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
|||||||
|
|
||||||
if (action != NULL)
|
if (action != NULL)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|||||||
@@ -115,14 +115,14 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
|||||||
bool cancel_query(PGconn *conn, int timeout);
|
bool cancel_query(PGconn *conn, int timeout);
|
||||||
char *get_repmgr_schema(void);
|
char *get_repmgr_schema(void);
|
||||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
@@ -133,3 +133,4 @@ int get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
|||||||
t_server_type parse_node_type(const char *type);
|
t_server_type parse_node_type(const char *type);
|
||||||
int get_data_checksum_version(const char *data_directory);
|
int get_data_checksum_version(const char *data_directory);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -37,5 +37,6 @@
|
|||||||
#define ERR_BAD_BASEBACKUP 14
|
#define ERR_BAD_BASEBACKUP 14
|
||||||
#define ERR_INTERNAL 15
|
#define ERR_INTERNAL 15
|
||||||
#define ERR_MONITORING_FAIL 16
|
#define ERR_MONITORING_FAIL 16
|
||||||
|
#define ERR_BAD_BACKUP_LABEL 17
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
5
log.c
5
log.c
@@ -40,7 +40,8 @@
|
|||||||
/* #define REPMGR_DEBUG */
|
/* #define REPMGR_DEBUG */
|
||||||
|
|
||||||
static int detect_log_facility(const char *facility);
|
static int detect_log_facility(const char *facility);
|
||||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||||
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||||
|
|
||||||
int log_type = REPMGR_STDERR;
|
int log_type = REPMGR_STDERR;
|
||||||
int log_level = LOG_NOTICE;
|
int log_level = LOG_NOTICE;
|
||||||
@@ -48,7 +49,7 @@ int last_log_level = LOG_NOTICE;
|
|||||||
int verbose_logging = false;
|
int verbose_logging = false;
|
||||||
int terse_logging = false;
|
int terse_logging = false;
|
||||||
|
|
||||||
void
|
extern void
|
||||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
va_list arglist;
|
va_list arglist;
|
||||||
|
|||||||
8
log.h
8
log.h
@@ -25,7 +25,7 @@
|
|||||||
#define REPMGR_SYSLOG 1
|
#define REPMGR_SYSLOG 1
|
||||||
#define REPMGR_STDERR 2
|
#define REPMGR_STDERR 2
|
||||||
|
|
||||||
void
|
extern void
|
||||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
|
|
||||||
@@ -123,8 +123,10 @@ bool logger_shutdown(void);
|
|||||||
void logger_set_verbose(void);
|
void logger_set_verbose(void);
|
||||||
void logger_set_terse(void);
|
void logger_set_terse(void);
|
||||||
|
|
||||||
void log_hint(const char *fmt, ...);
|
void log_hint(const char *fmt, ...)
|
||||||
void log_verbose(int level, const char *fmt, ...);
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
|
||||||
|
void log_verbose(int level, const char *fmt, ...)
|
||||||
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||||
|
|
||||||
extern int log_type;
|
extern int log_type;
|
||||||
extern int log_level;
|
extern int log_level;
|
||||||
|
|||||||
532
repmgr.c
532
repmgr.c
@@ -43,7 +43,6 @@
|
|||||||
#include "repmgr.h"
|
#include "repmgr.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <dirent.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@@ -122,6 +121,8 @@ static bool remote_command(const char *host, const char *user, const char *comma
|
|||||||
static void format_db_cli_params(const char *conninfo, char *output);
|
static void format_db_cli_params(const char *conninfo, char *output);
|
||||||
static bool copy_file(const char *old_filename, const char *new_filename);
|
static bool copy_file(const char *old_filename, const char *new_filename);
|
||||||
|
|
||||||
|
static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
|
||||||
|
|
||||||
/* Global variables */
|
/* Global variables */
|
||||||
static const char *keywords[6];
|
static const char *keywords[6];
|
||||||
static const char *values[6];
|
static const char *values[6];
|
||||||
@@ -147,6 +148,7 @@ static char path_buf[MAXLEN] = "";
|
|||||||
ErrorList cli_errors = { NULL, NULL };
|
ErrorList cli_errors = { NULL, NULL };
|
||||||
ErrorList cli_warnings = { NULL, NULL };
|
ErrorList cli_warnings = { NULL, NULL };
|
||||||
|
|
||||||
|
static struct BackupLabel backup_label;
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
@@ -159,6 +161,8 @@ main(int argc, char **argv)
|
|||||||
{"username", required_argument, NULL, 'U'},
|
{"username", required_argument, NULL, 'U'},
|
||||||
{"superuser", required_argument, NULL, 'S'},
|
{"superuser", required_argument, NULL, 'S'},
|
||||||
{"data-dir", required_argument, NULL, 'D'},
|
{"data-dir", required_argument, NULL, 'D'},
|
||||||
|
/* alias for -D/--data-dir, following pg_ctl usage */
|
||||||
|
{"pgdata", required_argument, NULL, 'D'},
|
||||||
/* -l/--local-port is deprecated */
|
/* -l/--local-port is deprecated */
|
||||||
{"local-port", required_argument, NULL, 'l'},
|
{"local-port", required_argument, NULL, 'l'},
|
||||||
{"config-file", required_argument, NULL, 'f'},
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
@@ -175,12 +179,14 @@ main(int argc, char **argv)
|
|||||||
{"terse", required_argument, NULL, 't'},
|
{"terse", required_argument, NULL, 't'},
|
||||||
{"mode", required_argument, NULL, 'm'},
|
{"mode", required_argument, NULL, 'm'},
|
||||||
{"remote-config-file", required_argument, NULL, 'C'},
|
{"remote-config-file", required_argument, NULL, 'C'},
|
||||||
|
/* deprecated from 3.2; replaced with -P/--pwprompt */
|
||||||
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
||||||
{"check-upstream-config", no_argument, NULL, 2},
|
{"check-upstream-config", no_argument, NULL, 2},
|
||||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
||||||
{"ignore-external-config-files", no_argument, NULL, 4},
|
{"ignore-external-config-files", no_argument, NULL, 4},
|
||||||
{"config-archive-dir", required_argument, NULL, 5},
|
{"config-archive-dir", required_argument, NULL, 5},
|
||||||
{"pg_rewind", optional_argument, NULL, 6},
|
{"pg_rewind", optional_argument, NULL, 6},
|
||||||
|
{"pwprompt", optional_argument, NULL, 7},
|
||||||
{"help", no_argument, NULL, '?'},
|
{"help", no_argument, NULL, '?'},
|
||||||
{"version", no_argument, NULL, 'V'},
|
{"version", no_argument, NULL, 'V'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
@@ -196,6 +202,19 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
set_progname(argv[0]);
|
set_progname(argv[0]);
|
||||||
|
|
||||||
|
/* Disallow running as root to prevent directory ownership problems */
|
||||||
|
if (geteuid() == 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
_("%s: cannot be run as root\n"
|
||||||
|
"Please log in (using, e.g., \"su\") as the "
|
||||||
|
"(unprivileged) user that owns\n"
|
||||||
|
"the data directory.\n"
|
||||||
|
),
|
||||||
|
progname());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
/* Initialise some defaults */
|
/* Initialise some defaults */
|
||||||
|
|
||||||
/* set default user */
|
/* set default user */
|
||||||
@@ -210,7 +229,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "could not get current user name: %s\n", strerror(errno));
|
fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -277,7 +296,7 @@ main(int argc, char **argv)
|
|||||||
strncpy(runtime_options.superuser, optarg, MAXLEN);
|
strncpy(runtime_options.superuser, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
case 'D':
|
case 'D':
|
||||||
strncpy(runtime_options.dest_dir, optarg, MAXFILENAME);
|
strncpy(runtime_options.dest_dir, optarg, MAXPGPATH);
|
||||||
break;
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
/* -l/--local-port is deprecated */
|
/* -l/--local-port is deprecated */
|
||||||
@@ -401,10 +420,13 @@ main(int argc, char **argv)
|
|||||||
case 6:
|
case 6:
|
||||||
if (optarg != NULL)
|
if (optarg != NULL)
|
||||||
{
|
{
|
||||||
strncpy(runtime_options.pg_rewind, optarg, MAXFILENAME);
|
strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH);
|
||||||
}
|
}
|
||||||
pg_rewind_supplied = true;
|
pg_rewind_supplied = true;
|
||||||
break;
|
break;
|
||||||
|
case 7:
|
||||||
|
runtime_options.witness_pwprompt = true;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
@@ -746,6 +768,8 @@ do_cluster_show(void)
|
|||||||
" FROM %s.repl_show_nodes",
|
" FROM %s.repl_show_nodes",
|
||||||
get_repmgr_schema_quoted(conn));
|
get_repmgr_schema_quoted(conn));
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery );
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1051,7 +1075,8 @@ do_master_register(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr,
|
||||||
|
true);
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
@@ -1152,9 +1177,8 @@ do_standby_register(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr,
|
||||||
|
true);
|
||||||
|
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
@@ -1287,29 +1311,30 @@ do_standby_clone(void)
|
|||||||
bool target_directory_provided = false;
|
bool target_directory_provided = false;
|
||||||
bool external_config_file_copy_required = false;
|
bool external_config_file_copy_required = false;
|
||||||
|
|
||||||
char master_data_directory[MAXFILENAME];
|
char master_data_directory[MAXPGPATH];
|
||||||
char local_data_directory[MAXFILENAME];
|
char local_data_directory[MAXPGPATH];
|
||||||
|
|
||||||
char master_config_file[MAXFILENAME] = "";
|
char master_config_file[MAXPGPATH] = "";
|
||||||
char local_config_file[MAXFILENAME] = "";
|
char local_config_file[MAXPGPATH] = "";
|
||||||
bool config_file_outside_pgdata = false;
|
bool config_file_outside_pgdata = false;
|
||||||
|
|
||||||
char master_hba_file[MAXFILENAME] = "";
|
char master_hba_file[MAXPGPATH] = "";
|
||||||
char local_hba_file[MAXFILENAME] = "";
|
char local_hba_file[MAXPGPATH] = "";
|
||||||
bool hba_file_outside_pgdata = false;
|
bool hba_file_outside_pgdata = false;
|
||||||
|
|
||||||
char master_ident_file[MAXFILENAME] = "";
|
char master_ident_file[MAXPGPATH] = "";
|
||||||
char local_ident_file[MAXFILENAME] = "";
|
char local_ident_file[MAXPGPATH] = "";
|
||||||
bool ident_file_outside_pgdata = false;
|
bool ident_file_outside_pgdata = false;
|
||||||
|
|
||||||
char master_control_file[MAXFILENAME] = "";
|
char master_control_file[MAXPGPATH] = "";
|
||||||
char local_control_file[MAXFILENAME] = "";
|
char local_control_file[MAXPGPATH] = "";
|
||||||
|
|
||||||
char *first_wal_segment = NULL;
|
char *first_wal_segment = NULL;
|
||||||
char *last_wal_segment = NULL;
|
char *last_wal_segment = NULL;
|
||||||
|
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If dest_dir (-D/--pgdata) was provided, this will become the new data
|
* If dest_dir (-D/--pgdata) was provided, this will become the new data
|
||||||
* directory (otherwise repmgr will default to the same directory as on the
|
* directory (otherwise repmgr will default to the same directory as on the
|
||||||
@@ -1471,7 +1496,7 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
|
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
|
||||||
{
|
{
|
||||||
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||||
}
|
}
|
||||||
else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0)
|
else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0)
|
||||||
{
|
{
|
||||||
@@ -1479,7 +1504,7 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
config_file_outside_pgdata = true;
|
config_file_outside_pgdata = true;
|
||||||
external_config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
|
else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
|
||||||
@@ -1488,7 +1513,7 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
hba_file_outside_pgdata = true;
|
hba_file_outside_pgdata = true;
|
||||||
external_config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0)
|
else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0)
|
||||||
@@ -1497,7 +1522,7 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
ident_file_outside_pgdata = true;
|
ident_file_outside_pgdata = true;
|
||||||
external_config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1513,20 +1538,20 @@ do_standby_clone(void)
|
|||||||
*/
|
*/
|
||||||
if (target_directory_provided)
|
if (target_directory_provided)
|
||||||
{
|
{
|
||||||
strncpy(local_data_directory, runtime_options.dest_dir, MAXFILENAME);
|
strncpy(local_data_directory, runtime_options.dest_dir, MAXPGPATH);
|
||||||
strncpy(local_config_file, runtime_options.dest_dir, MAXFILENAME);
|
strncpy(local_config_file, runtime_options.dest_dir, MAXPGPATH);
|
||||||
strncpy(local_hba_file, runtime_options.dest_dir, MAXFILENAME);
|
strncpy(local_hba_file, runtime_options.dest_dir, MAXPGPATH);
|
||||||
strncpy(local_ident_file, runtime_options.dest_dir, MAXFILENAME);
|
strncpy(local_ident_file, runtime_options.dest_dir, MAXPGPATH);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Otherwise use the same data directory as on the remote host
|
* Otherwise use the same data directory as on the remote host
|
||||||
*/
|
*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(local_data_directory, master_data_directory, MAXFILENAME);
|
strncpy(local_data_directory, master_data_directory, MAXPGPATH);
|
||||||
strncpy(local_config_file, master_config_file, MAXFILENAME);
|
strncpy(local_config_file, master_config_file, MAXPGPATH);
|
||||||
strncpy(local_hba_file, master_hba_file, MAXFILENAME);
|
strncpy(local_hba_file, master_hba_file, MAXPGPATH);
|
||||||
strncpy(local_ident_file, master_ident_file, MAXFILENAME);
|
strncpy(local_ident_file, master_ident_file, MAXPGPATH);
|
||||||
|
|
||||||
log_notice(_("setting data directory to: %s\n"), local_data_directory);
|
log_notice(_("setting data directory to: %s\n"), local_data_directory);
|
||||||
log_hint(_("use -D/--data-dir to explicitly specify a data directory\n"));
|
log_hint(_("use -D/--data-dir to explicitly specify a data directory\n"));
|
||||||
@@ -1566,7 +1591,7 @@ do_standby_clone(void)
|
|||||||
*/
|
*/
|
||||||
if (options.use_replication_slots)
|
if (options.use_replication_slots)
|
||||||
{
|
{
|
||||||
if (create_replication_slot(upstream_conn, repmgr_slot_name) == false)
|
if (create_replication_slot(upstream_conn, repmgr_slot_name, server_version_num) == false)
|
||||||
{
|
{
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
@@ -1634,13 +1659,25 @@ do_standby_clone(void)
|
|||||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||||
master_data_directory, local_data_directory,
|
master_data_directory, local_data_directory,
|
||||||
true, server_version_num);
|
true, server_version_num);
|
||||||
if (r != 0)
|
/*
|
||||||
|
Exit code 0 means no error, but we want to ignore exit code 24 as well
|
||||||
|
as rsync returns that code on "Partial transfer due to vanished source files".
|
||||||
|
It's quite common for this to happen on the data directory, particularly
|
||||||
|
with long running rsync on a busy server.
|
||||||
|
*/
|
||||||
|
if (!WIFEXITED(r) && WEXITSTATUS(r) != 24)
|
||||||
{
|
{
|
||||||
log_warning(_("standby clone: failed copying master data directory '%s'\n"),
|
log_warning(_("standby clone: failed copying master data directory '%s'\n"),
|
||||||
master_data_directory);
|
master_data_directory);
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Read backup label copied from primary */
|
||||||
|
/* XXX ensure this function does not exit on error as we'd need to stop the backup */
|
||||||
|
read_backup_label(local_data_directory, &backup_label);
|
||||||
|
|
||||||
|
printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file);
|
||||||
|
|
||||||
/* Handle tablespaces */
|
/* Handle tablespaces */
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
@@ -1708,6 +1745,18 @@ do_standby_clone(void)
|
|||||||
tblspc_dir_src.data, tblspc_dir_dst.data,
|
tblspc_dir_src.data, tblspc_dir_dst.data,
|
||||||
true, server_version_num);
|
true, server_version_num);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Exit code 0 means no error, but we want to ignore exit code 24 as well
|
||||||
|
as rsync returns that code on "Partial transfer due to vanished source files".
|
||||||
|
It's quite common for this to happen on the data directory, particularly
|
||||||
|
with long running rsync on a busy server.
|
||||||
|
*/
|
||||||
|
if (!WIFEXITED(r) && WEXITSTATUS(r) != 24)
|
||||||
|
{
|
||||||
|
log_warning(_("standby clone: failed copying tablespace directory '%s'\n"),
|
||||||
|
tblspc_dir_src.data);
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
|
|
||||||
/* Update symlinks in pg_tblspc */
|
/* Update symlinks in pg_tblspc */
|
||||||
if (mapping_found == true)
|
if (mapping_found == true)
|
||||||
@@ -1932,16 +1981,18 @@ stop_backup:
|
|||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clean up any $PGDATA subdirectories which may contain
|
* Clean up any $PGDATA subdirectories which may contain
|
||||||
* files which won't be removed by rsync and which could
|
* files which won't be removed by rsync and which could
|
||||||
* be stale or are otherwise not required
|
* be stale or are otherwise not required
|
||||||
*/
|
*/
|
||||||
if (runtime_options.rsync_only && runtime_options.force)
|
if (runtime_options.rsync_only)
|
||||||
{
|
{
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
|
char label_path[MAXPGPATH];
|
||||||
|
|
||||||
|
if (runtime_options.force)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* Remove any existing WAL from the target directory, since
|
* Remove any existing WAL from the target directory, since
|
||||||
* rsync's --exclude option doesn't do it.
|
* rsync's --exclude option doesn't do it.
|
||||||
@@ -1955,21 +2006,30 @@ stop_backup:
|
|||||||
local_data_directory);
|
local_data_directory);
|
||||||
exit(ERR_BAD_RSYNC);
|
exit(ERR_BAD_RSYNC);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove any replication slot directories; this matches the
|
* Remove any existing replication slot directories from previous use
|
||||||
* behaviour a base backup, which would result in an empty
|
* of this data directory; this matches the behaviour of a fresh
|
||||||
* pg_replslot directory.
|
* pg_basebackup, which would usually result in an empty pg_replslot
|
||||||
|
* directory.
|
||||||
|
*
|
||||||
|
* If the backup label contains a nonzero
|
||||||
|
* 'MIN FAILOVER SLOT LSN' entry we retain the slots and let
|
||||||
|
* the server clean them up instead, matching pg_basebackup's
|
||||||
|
* behaviour when failover slots are enabled.
|
||||||
*
|
*
|
||||||
* NOTE: watch out for any changes in the replication
|
* NOTE: watch out for any changes in the replication
|
||||||
* slot directory name (as of 9.4: "pg_replslot") and
|
* slot directory name (as of 9.4: "pg_replslot") and
|
||||||
* functionality of replication slots
|
* functionality of replication slots
|
||||||
*/
|
*/
|
||||||
|
if (server_version_num >= 90400 &&
|
||||||
if (server_version_num >= 90400)
|
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*",
|
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*",
|
||||||
local_data_directory);
|
local_data_directory);
|
||||||
|
|
||||||
|
log_debug("deleting pg_replslot directory contents\n");
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
@@ -1978,6 +2038,13 @@ stop_backup:
|
|||||||
exit(ERR_BAD_RSYNC);
|
exit(ERR_BAD_RSYNC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* delete the backup label file copied from the primary */
|
||||||
|
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
|
||||||
|
if (0 && unlink(label_path) < 0 && errno != ENOENT)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to delete backup label file %s\n"), label_path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finally, write the recovery.conf file */
|
/* Finally, write the recovery.conf file */
|
||||||
@@ -1993,9 +2060,9 @@ stop_backup:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX It might be nice to provide the following options:
|
* XXX It might be nice to provide an options to have repmgr start
|
||||||
* - have repmgr start the daemon automatically
|
* the PostgreSQL server automatically (e.g. with a custom pg_ctl
|
||||||
* - provide a custom pg_ctl command
|
* command)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
log_notice(_("you can now start your PostgreSQL server\n"));
|
log_notice(_("you can now start your PostgreSQL server\n"));
|
||||||
@@ -2009,7 +2076,28 @@ stop_backup:
|
|||||||
log_hint(_("for example : /etc/init.d/postgresql start\n"));
|
log_hint(_("for example : /etc/init.d/postgresql start\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log the event - if we could connect to the primary */
|
|
||||||
|
/*
|
||||||
|
* XXX forgetting to (re) register the standby is a frequent cause
|
||||||
|
* of error; we should consider having repmgr automatically
|
||||||
|
* register the standby, either by default with an option
|
||||||
|
* "--no-register", or an option "--register".
|
||||||
|
*
|
||||||
|
* Note that "repmgr standby register" requires the standby to
|
||||||
|
* be running - if not, and we just update the node record,
|
||||||
|
* we'd have an incorrect representation of the replication cluster.
|
||||||
|
* Best combined with an automatic start of the server (see note
|
||||||
|
* above)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX detect whether a record exists for this node already, and
|
||||||
|
* add a hint about using the -F/--force.
|
||||||
|
*/
|
||||||
|
|
||||||
|
log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"\n"));
|
||||||
|
|
||||||
|
/* Log the event - if we can connect to the primary */
|
||||||
|
|
||||||
if (primary_conn != NULL)
|
if (primary_conn != NULL)
|
||||||
{
|
{
|
||||||
@@ -2041,6 +2129,159 @@ stop_backup:
|
|||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
parse_lsn(XLogRecPtr *ptr, const char *str)
|
||||||
|
{
|
||||||
|
uint32 high, low;
|
||||||
|
|
||||||
|
if (sscanf(str, "%x/%x", &high, &low) != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
*ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static XLogRecPtr
|
||||||
|
parse_label_lsn(const char *label_key, const char *label_value)
|
||||||
|
{
|
||||||
|
XLogRecPtr ptr;
|
||||||
|
|
||||||
|
if (!parse_lsn(&ptr, label_value))
|
||||||
|
{
|
||||||
|
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
||||||
|
label_key, label_value);
|
||||||
|
|
||||||
|
exit(ERR_BAD_BACKUP_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*======================================
|
||||||
|
* Read entries of interest from the backup label.
|
||||||
|
*
|
||||||
|
* Sample backup label (with failover slots):
|
||||||
|
*
|
||||||
|
* START WAL LOCATION: 0/6000028 (file 000000010000000000000006)
|
||||||
|
* CHECKPOINT LOCATION: 0/6000060
|
||||||
|
* BACKUP METHOD: streamed
|
||||||
|
* BACKUP FROM: master
|
||||||
|
* START TIME: 2016-03-30 12:18:12 AWST
|
||||||
|
* LABEL: pg_basebackup base backup
|
||||||
|
* MIN FAILOVER SLOT LSN: 0/5000000
|
||||||
|
*
|
||||||
|
*======================================
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
|
||||||
|
{
|
||||||
|
char label_path[MAXPGPATH];
|
||||||
|
FILE *label_file;
|
||||||
|
int nmatches = 0;
|
||||||
|
|
||||||
|
char line[MAXLEN];
|
||||||
|
|
||||||
|
out_backup_label->start_wal_location = InvalidXLogRecPtr;
|
||||||
|
out_backup_label->start_wal_file[0] = '\0';
|
||||||
|
out_backup_label->checkpoint_location = InvalidXLogRecPtr;
|
||||||
|
out_backup_label->backup_from[0] = '\0';
|
||||||
|
out_backup_label->backup_method[0] = '\0';
|
||||||
|
out_backup_label->start_time[0] = '\0';
|
||||||
|
out_backup_label->label[0] = '\0';
|
||||||
|
out_backup_label->min_failover_slot_lsn = InvalidXLogRecPtr;
|
||||||
|
|
||||||
|
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
|
||||||
|
|
||||||
|
label_file = fopen(label_path, "r");
|
||||||
|
if (label_file == NULL)
|
||||||
|
{
|
||||||
|
log_err(_("read_backup_label: could not open backup label file %s: %s"),
|
||||||
|
label_path, strerror(errno));
|
||||||
|
exit(ERR_BAD_BACKUP_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info(_("read_backup_label: parsing backup label file '%s'\n"),
|
||||||
|
label_path);
|
||||||
|
|
||||||
|
while(fgets(line, sizeof line, label_file) != NULL)
|
||||||
|
{
|
||||||
|
char label_key[MAXLEN];
|
||||||
|
char label_value[MAXLEN];
|
||||||
|
char newline;
|
||||||
|
|
||||||
|
nmatches = sscanf(line, "%" MAXLEN_STR "[^:]: %" MAXLEN_STR "[^\n]%c",
|
||||||
|
label_key, label_value, &newline);
|
||||||
|
|
||||||
|
if (nmatches != 3)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (newline != '\n')
|
||||||
|
{
|
||||||
|
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
|
||||||
|
label_key, label_value);
|
||||||
|
exit(ERR_BAD_BACKUP_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("standby clone: got backup label entry \"%s: %s\"\n",
|
||||||
|
label_key, label_value);
|
||||||
|
|
||||||
|
if (strcmp(label_key, "START WAL LOCATION") == 0)
|
||||||
|
{
|
||||||
|
char start_wal_location[MAXLEN];
|
||||||
|
char wal_filename[MAXLEN];
|
||||||
|
|
||||||
|
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
|
||||||
|
if (nmatches != 2)
|
||||||
|
{
|
||||||
|
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
|
||||||
|
exit(ERR_BAD_BACKUP_LABEL);
|
||||||
|
}
|
||||||
|
out_backup_label->start_wal_location =
|
||||||
|
parse_label_lsn(&label_key[0], start_wal_location);
|
||||||
|
|
||||||
|
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
|
||||||
|
out_backup_label->start_wal_file[MAXLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "CHECKPOINT LOCATION") == 0)
|
||||||
|
{
|
||||||
|
out_backup_label->checkpoint_location =
|
||||||
|
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "BACKUP METHOD") == 0)
|
||||||
|
{
|
||||||
|
(void) strncpy(out_backup_label->backup_method, label_value, MAXLEN);
|
||||||
|
out_backup_label->backup_method[MAXLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "BACKUP FROM") == 0)
|
||||||
|
{
|
||||||
|
(void) strncpy(out_backup_label->backup_from, label_value, MAXLEN);
|
||||||
|
out_backup_label->backup_from[MAXLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "START TIME") == 0)
|
||||||
|
{
|
||||||
|
(void) strncpy(out_backup_label->start_time, label_value, MAXLEN);
|
||||||
|
out_backup_label->start_time[MAXLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "LABEL") == 0)
|
||||||
|
{
|
||||||
|
(void) strncpy(out_backup_label->label, label_value, MAXLEN);
|
||||||
|
out_backup_label->label[MAXLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
else if (strcmp(label_key, "MIN FAILOVER SLOT LSN") == 0)
|
||||||
|
{
|
||||||
|
out_backup_label->min_failover_slot_lsn =
|
||||||
|
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info("read_backup_label: ignored unrecognised backup label entry \"%s: %s\"",
|
||||||
|
label_key, label_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) fclose(label_file);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_standby_promote(void)
|
do_standby_promote(void)
|
||||||
@@ -2214,7 +2455,7 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
int r,
|
int r,
|
||||||
retval;
|
retval;
|
||||||
char data_dir[MAXFILENAME];
|
char data_dir[MAXPGPATH];
|
||||||
|
|
||||||
bool success;
|
bool success;
|
||||||
|
|
||||||
@@ -2297,7 +2538,7 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
master_id = get_master_node_id(master_conn, options.cluster_name);
|
master_id = get_master_node_id(master_conn, options.cluster_name);
|
||||||
|
|
||||||
strncpy(data_dir, runtime_options.dest_dir, MAXFILENAME);
|
strncpy(data_dir, runtime_options.dest_dir, MAXPGPATH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2329,7 +2570,9 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
if (options.use_replication_slots)
|
if (options.use_replication_slots)
|
||||||
{
|
{
|
||||||
if (create_replication_slot(master_conn, repmgr_slot_name) == false)
|
int server_version_num = get_server_version(master_conn, NULL);
|
||||||
|
|
||||||
|
if (create_replication_slot(master_conn, repmgr_slot_name, server_version_num) == false)
|
||||||
{
|
{
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
@@ -2834,8 +3077,8 @@ do_standby_switchover(void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/pg_ctl -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
"%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
||||||
pg_bindir,
|
make_pg_path("pg_ctl"),
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
runtime_options.pg_ctl_mode);
|
runtime_options.pg_ctl_mode);
|
||||||
|
|
||||||
@@ -3277,10 +3520,17 @@ do_standby_restore_config(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
while ((arcdir_ent = readdir(arcdir)) != NULL) {
|
while ((arcdir_ent = readdir(arcdir)) != NULL) {
|
||||||
|
struct stat statbuf;
|
||||||
|
char arcdir_ent_path[MAXPGPATH];
|
||||||
PQExpBufferData src_file;
|
PQExpBufferData src_file;
|
||||||
PQExpBufferData dst_file;
|
PQExpBufferData dst_file;
|
||||||
|
|
||||||
if (arcdir_ent->d_type != DT_REG)
|
snprintf(arcdir_ent_path, MAXPGPATH,
|
||||||
|
"%s/%s",
|
||||||
|
runtime_options.config_archive_dir,
|
||||||
|
arcdir_ent->d_name);
|
||||||
|
|
||||||
|
if (stat(arcdir_ent_path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -3358,6 +3608,8 @@ do_witness_create(void)
|
|||||||
char master_hba_file[MAXLEN];
|
char master_hba_file[MAXLEN];
|
||||||
bool success;
|
bool success;
|
||||||
bool record_created;
|
bool record_created;
|
||||||
|
char repmgr_user[MAXLEN];
|
||||||
|
char repmgr_db[MAXLEN];
|
||||||
|
|
||||||
/* Connection parameters for master only */
|
/* Connection parameters for master only */
|
||||||
keywords[0] = "host";
|
keywords[0] = "host";
|
||||||
@@ -3365,6 +3617,13 @@ do_witness_create(void)
|
|||||||
keywords[1] = "port";
|
keywords[1] = "port";
|
||||||
values[1] = runtime_options.masterport;
|
values[1] = runtime_options.masterport;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract the repmgr user and database names from the conninfo string
|
||||||
|
* provided in repmgr.conf
|
||||||
|
*/
|
||||||
|
get_conninfo_value(options.conninfo, "user", repmgr_user);
|
||||||
|
get_conninfo_value(options.conninfo, "dbname", repmgr_db);
|
||||||
|
|
||||||
/* We need to connect to check configuration and copy it */
|
/* We need to connect to check configuration and copy it */
|
||||||
masterconn = establish_db_connection_by_params(keywords, values, true);
|
masterconn = establish_db_connection_by_params(keywords, values, true);
|
||||||
if (!masterconn)
|
if (!masterconn)
|
||||||
@@ -3454,7 +3713,7 @@ do_witness_create(void)
|
|||||||
maxlen_snprintf(script, "%s %s -D %s init -o \"%s-U %s\"",
|
maxlen_snprintf(script, "%s %s -D %s init -o \"%s-U %s\"",
|
||||||
make_pg_path("pg_ctl"),
|
make_pg_path("pg_ctl"),
|
||||||
options.pg_ctl_options, runtime_options.dest_dir,
|
options.pg_ctl_options, runtime_options.dest_dir,
|
||||||
runtime_options.initdb_no_pwprompt ? "" : "-W ",
|
runtime_options.witness_pwprompt ? "-W " : "",
|
||||||
runtime_options.superuser);
|
runtime_options.superuser);
|
||||||
log_info(_("initializing cluster for witness: %s.\n"), script);
|
log_info(_("initializing cluster for witness: %s.\n"), script);
|
||||||
|
|
||||||
@@ -3500,8 +3759,8 @@ do_witness_create(void)
|
|||||||
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname());
|
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname());
|
||||||
fputs(buf, pg_conf);
|
fputs(buf, pg_conf);
|
||||||
|
|
||||||
|
/*
|
||||||
/* Attempt to extract a port number from the provided conninfo string
|
* Attempt to extract a port number from the provided conninfo string.
|
||||||
* This will override any value provided with '-l/--local-port', as it's
|
* This will override any value provided with '-l/--local-port', as it's
|
||||||
* what we'll later try and connect to anyway. '-l/--local-port' should
|
* what we'll later try and connect to anyway. '-l/--local-port' should
|
||||||
* be deprecated.
|
* be deprecated.
|
||||||
@@ -3552,13 +3811,18 @@ do_witness_create(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* check if we need to create a user */
|
/* check if we need to create a user */
|
||||||
if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0)
|
if (strcmp(repmgr_user, "postgres") != 0)
|
||||||
{
|
{
|
||||||
/* create required user; needs to be superuser to create untrusted language function in c */
|
/* create required user; needs to be superuser to create untrusted
|
||||||
maxlen_snprintf(script, "%s -p %s --superuser --login -U %s %s",
|
* language function in C */
|
||||||
|
maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s",
|
||||||
make_pg_path("createuser"),
|
make_pg_path("createuser"),
|
||||||
runtime_options.localport, runtime_options.superuser, runtime_options.username);
|
runtime_options.localport,
|
||||||
|
runtime_options.witness_pwprompt ? "-P " : "",
|
||||||
|
runtime_options.superuser,
|
||||||
|
repmgr_user);
|
||||||
log_info(_("creating user for witness db: %s.\n"), script);
|
log_info(_("creating user for witness db: %s.\n"), script);
|
||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
@@ -3584,7 +3848,10 @@ do_witness_create(void)
|
|||||||
/* create required db */
|
/* create required db */
|
||||||
maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s",
|
maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s",
|
||||||
make_pg_path("createdb"),
|
make_pg_path("createdb"),
|
||||||
runtime_options.localport, runtime_options.superuser, runtime_options.username, runtime_options.dbname);
|
runtime_options.localport,
|
||||||
|
runtime_options.superuser,
|
||||||
|
repmgr_user,
|
||||||
|
repmgr_db);
|
||||||
log_info("creating database for witness db: %s.\n", script);
|
log_info("creating database for witness db: %s.\n", script);
|
||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
@@ -3610,7 +3877,7 @@ do_witness_create(void)
|
|||||||
|
|
||||||
if (success == false)
|
if (success == false)
|
||||||
{
|
{
|
||||||
char *errmsg = _("unable to retrieve location of pg_hba.conf");
|
char *errmsg = _("Unable to retrieve location of pg_hba.conf");
|
||||||
log_err("%s\n", errmsg);
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
create_event_record(masterconn,
|
create_event_record(masterconn,
|
||||||
@@ -3627,7 +3894,7 @@ do_witness_create(void)
|
|||||||
master_hba_file, runtime_options.dest_dir, false, -1);
|
master_hba_file, runtime_options.dest_dir, false, -1);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
char *errmsg = _("unable to copy pg_hba.conf from master");
|
char *errmsg = _("Unable to copy pg_hba.conf from master");
|
||||||
log_err("%s\n", errmsg);
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
create_event_record(masterconn,
|
create_event_record(masterconn,
|
||||||
@@ -3641,7 +3908,7 @@ do_witness_create(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reload to adapt for changed pg_hba.conf */
|
/* reload witness server to activate the copied pg_hba.conf */
|
||||||
maxlen_snprintf(script, "%s %s -w -D %s reload",
|
maxlen_snprintf(script, "%s %s -w -D %s reload",
|
||||||
make_pg_path("pg_ctl"),
|
make_pg_path("pg_ctl"),
|
||||||
options.pg_ctl_options, runtime_options.dest_dir);
|
options.pg_ctl_options, runtime_options.dest_dir);
|
||||||
@@ -3663,7 +3930,47 @@ do_witness_create(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* register ourselves in the master */
|
/* establish a connection to the witness, and create the schema */
|
||||||
|
witnessconn = establish_db_connection(options.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(witnessconn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
create_event_record(masterconn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
_("Unable to connect to witness servetr"));
|
||||||
|
PQfinish(masterconn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
log_info(_("starting copy of configuration from master...\n"));
|
||||||
|
|
||||||
|
begin_transaction(witnessconn);
|
||||||
|
|
||||||
|
if (!create_schema(witnessconn))
|
||||||
|
{
|
||||||
|
rollback_transaction(witnessconn);
|
||||||
|
create_event_record(masterconn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
_("Unable to create schema on witness"));
|
||||||
|
PQfinish(masterconn);
|
||||||
|
PQfinish(witnessconn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
commit_transaction(witnessconn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register new witness server on the primary
|
||||||
|
* Do this as late as possible to avoid having to delete
|
||||||
|
* the record if the server creation fails
|
||||||
|
*/
|
||||||
|
|
||||||
if (runtime_options.force)
|
if (runtime_options.force)
|
||||||
{
|
{
|
||||||
@@ -3687,7 +3994,8 @@ do_witness_create(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
NULL);
|
NULL,
|
||||||
|
true);
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
@@ -3702,32 +4010,9 @@ do_witness_create(void)
|
|||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* establish a connection to the witness, and create the schema */
|
|
||||||
witnessconn = establish_db_connection(options.conninfo, true);
|
|
||||||
|
|
||||||
log_info(_("starting copy of configuration from master...\n"));
|
|
||||||
|
|
||||||
begin_transaction(witnessconn);
|
|
||||||
|
|
||||||
|
|
||||||
if (!create_schema(witnessconn))
|
|
||||||
{
|
|
||||||
rollback_transaction(witnessconn);
|
|
||||||
create_event_record(masterconn,
|
|
||||||
&options,
|
|
||||||
options.node,
|
|
||||||
"witness_create",
|
|
||||||
false,
|
|
||||||
_("unable to create schema on witness"));
|
|
||||||
PQfinish(masterconn);
|
|
||||||
PQfinish(witnessconn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
commit_transaction(witnessconn);
|
|
||||||
|
|
||||||
/* copy configuration from master, only repl_nodes is needed */
|
/* copy configuration from master, only repl_nodes is needed */
|
||||||
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
|
if (!witness_copy_node_records(masterconn, witnessconn, options.cluster_name))
|
||||||
{
|
{
|
||||||
create_event_record(masterconn,
|
create_event_record(masterconn,
|
||||||
&options,
|
&options,
|
||||||
@@ -3735,24 +4020,33 @@ do_witness_create(void)
|
|||||||
"witness_create",
|
"witness_create",
|
||||||
false,
|
false,
|
||||||
_("Unable to copy configuration from master"));
|
_("Unable to copy configuration from master"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* delete previously created witness node record
|
||||||
|
* XXX maybe set inactive?
|
||||||
|
*/
|
||||||
|
delete_node_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness create");
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
PQfinish(witnessconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* drop superuser powers if needed */
|
/* drop superuser powers if needed */
|
||||||
if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0)
|
if (strcmp(repmgr_user, "postgres") != 0)
|
||||||
{
|
{
|
||||||
sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", runtime_options.username);
|
sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user);
|
||||||
log_info(_("revoking superuser status on user %s: %s.\n"),
|
log_info(_("revoking superuser status on user %s: %s.\n"),
|
||||||
runtime_options.username, sqlquery);
|
repmgr_user, sqlquery);
|
||||||
|
|
||||||
log_debug(_("witness create: %s\n"), sqlquery);
|
log_debug(_("witness create: %s\n"), sqlquery);
|
||||||
res = PQexec(witnessconn, sqlquery);
|
res = PQexec(witnessconn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to alter user privileges for user %s: %s\n"),
|
log_err(_("Unable to alter user privileges for user %s: %s\n"),
|
||||||
runtime_options.username,
|
repmgr_user,
|
||||||
PQerrorMessage(witnessconn));
|
PQerrorMessage(witnessconn));
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
PQfinish(witnessconn);
|
||||||
@@ -3760,6 +4054,10 @@ do_witness_create(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Finished with the witness server */
|
||||||
|
|
||||||
|
PQfinish(witnessconn);
|
||||||
|
|
||||||
/* Log the event */
|
/* Log the event */
|
||||||
create_event_record(masterconn,
|
create_event_record(masterconn,
|
||||||
&options,
|
&options,
|
||||||
@@ -3769,7 +4067,6 @@ do_witness_create(void)
|
|||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
|
||||||
|
|
||||||
log_notice(_("configuration has been successfully copied to the witness\n"));
|
log_notice(_("configuration has been successfully copied to the witness\n"));
|
||||||
}
|
}
|
||||||
@@ -3829,7 +4126,8 @@ do_help(void)
|
|||||||
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
|
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
|
||||||
" optionally providing a path to the binary\n"));
|
" optionally providing a path to the binary\n"));
|
||||||
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
|
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
|
||||||
printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));
|
/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/
|
||||||
|
printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n"));
|
||||||
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
|
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
|
||||||
" (default: postgres)\n"));
|
" (default: postgres)\n"));
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
@@ -3975,6 +4273,7 @@ test_ssh_connection(char *host, char *remote_user)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
copy_remote_files(char *host, char *remote_user, char *remote_path,
|
copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||||
char *local_path, bool is_directory, int server_version_num)
|
char *local_path, bool is_directory, int server_version_num)
|
||||||
@@ -4019,6 +4318,9 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
* See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
|
* See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
|
||||||
* we're basically simulating what pg_basebackup does, but with rsync rather
|
* we're basically simulating what pg_basebackup does, but with rsync rather
|
||||||
* than the BASEBACKUP replication protocol command.
|
* than the BASEBACKUP replication protocol command.
|
||||||
|
*
|
||||||
|
* *However* currently we'll always copy the contents of the 'pg_replslot'
|
||||||
|
* directory and delete later if appropriate.
|
||||||
*/
|
*/
|
||||||
if (is_directory)
|
if (is_directory)
|
||||||
{
|
{
|
||||||
@@ -4047,12 +4349,6 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
appendPQExpBuffer(&rsync_flags, "%s",
|
appendPQExpBuffer(&rsync_flags, "%s",
|
||||||
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
||||||
|
|
||||||
if (server_version_num >= 90400)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&rsync_flags, "%s",
|
|
||||||
" --exclude=pg_replslot/*");
|
|
||||||
}
|
|
||||||
|
|
||||||
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
|
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
|
||||||
rsync_flags.data, host_string, remote_path, local_path);
|
rsync_flags.data, host_string, remote_path, local_path);
|
||||||
}
|
}
|
||||||
@@ -4275,6 +4571,11 @@ check_parameters_for_action(const int action)
|
|||||||
config_file_required = false;
|
config_file_required = false;
|
||||||
break;
|
break;
|
||||||
case WITNESS_CREATE:
|
case WITNESS_CREATE:
|
||||||
|
/* Require data directory */
|
||||||
|
if (strcmp(runtime_options.dest_dir, "") == 0)
|
||||||
|
{
|
||||||
|
error_list_append(&cli_errors, _("-D/--data-dir required when executing WITNESS CREATE"));
|
||||||
|
}
|
||||||
/* allow all parameters to be supplied */
|
/* allow all parameters to be supplied */
|
||||||
break;
|
break;
|
||||||
case CLUSTER_SHOW:
|
case CLUSTER_SHOW:
|
||||||
@@ -4406,7 +4707,7 @@ create_schema(PGconn *conn)
|
|||||||
"CREATE TABLE %s.repl_nodes ( "
|
"CREATE TABLE %s.repl_nodes ( "
|
||||||
" id INTEGER PRIMARY KEY, "
|
" id INTEGER PRIMARY KEY, "
|
||||||
" type TEXT NOT NULL CHECK (type IN('master','standby','witness')), "
|
" type TEXT NOT NULL CHECK (type IN('master','standby','witness')), "
|
||||||
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id), "
|
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id) DEFERRABLE, "
|
||||||
" cluster TEXT NOT NULL, "
|
" cluster TEXT NOT NULL, "
|
||||||
" name TEXT NOT NULL, "
|
" name TEXT NOT NULL, "
|
||||||
" conninfo TEXT NOT NULL, "
|
" conninfo TEXT NOT NULL, "
|
||||||
@@ -4766,29 +5067,48 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
|
|||||||
char *wal_error_message = NULL;
|
char *wal_error_message = NULL;
|
||||||
|
|
||||||
/* Check that WAL level is set correctly */
|
/* Check that WAL level is set correctly */
|
||||||
if (server_version_num < 90300)
|
if (server_version_num < 90400)
|
||||||
{
|
{
|
||||||
i = guc_set(conn, "wal_level", "=", "hot_standby");
|
i = guc_set(conn, "wal_level", "=", "hot_standby");
|
||||||
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby'");
|
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby'");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char *levels[] = {
|
char *levels_pre96[] = {
|
||||||
"hot_standby",
|
"hot_standby",
|
||||||
"logical",
|
"logical",
|
||||||
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
int j = 0;
|
char *levels_96plus[] = {
|
||||||
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
|
"replica",
|
||||||
|
"logical",
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
for(; j < 2; j++)
|
char **levels;
|
||||||
|
int j = 0;
|
||||||
|
|
||||||
|
if (server_version_num < 90600)
|
||||||
|
{
|
||||||
|
levels = (char **)levels_pre96;
|
||||||
|
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
levels = (char **)levels_96plus;
|
||||||
|
wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'");
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
{
|
{
|
||||||
i = guc_set(conn, "wal_level", "=", levels[j]);
|
i = guc_set(conn, "wal_level", "=", levels[j]);
|
||||||
if (i)
|
if (i)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
j++;
|
||||||
|
} while (levels[j] != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == 0 || i == -1)
|
if (i == 0 || i == -1)
|
||||||
|
|||||||
@@ -15,21 +15,21 @@
|
|||||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||||
# to facilitate easier querying of the repmgr views and tables.
|
# to facilitate easier querying of the repmgr views and tables.
|
||||||
cluster=example_cluster
|
#cluster=example_cluster
|
||||||
|
|
||||||
# Node ID and name
|
# Node ID and name
|
||||||
# (Note: we recommend to avoid naming nodes after their initial
|
# (Note: we recommend to avoid naming nodes after their initial
|
||||||
# replication funcion, as this will cause confusion when e.g.
|
# replication funcion, as this will cause confusion when e.g.
|
||||||
# "standby2" is promoted to primary)
|
# "standby2" is promoted to primary)
|
||||||
node=2 # a unique integer
|
#node=2 # a unique integer
|
||||||
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||||
# the server's hostname or another identifier unambiguously
|
# the server's hostname or another identifier unambiguously
|
||||||
# associated with the server to avoid confusion
|
# associated with the server to avoid confusion
|
||||||
|
|
||||||
# Database connection information as a conninfo string
|
# Database connection information as a conninfo string
|
||||||
# This must be accessible to all servers in the cluster; for details see:
|
# This must be accessible to all servers in the cluster; for details see:
|
||||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||||
|
|
||||||
# Optional configuration items
|
# Optional configuration items
|
||||||
# ============================
|
# ============================
|
||||||
@@ -37,15 +37,16 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
|||||||
# Replication settings
|
# Replication settings
|
||||||
# ---------------------
|
# ---------------------
|
||||||
|
|
||||||
# when using cascading replication and a standby is to be connected to an
|
# When using cascading replication, a standby can connect to another
|
||||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
# upstream standby node which is specified by setting 'upstream_node'.
|
||||||
# must exist before the new standby can be registered. If a standby is
|
# In that case, the upstream node must exist before the new standby
|
||||||
# to connect directly to a primary node, this parameter is not required.
|
# can be registered. If 'upstream_node' is not set, then the standby
|
||||||
upstream_node=1
|
# will connect directly to the primary node.
|
||||||
|
#upstream_node=1
|
||||||
|
|
||||||
# use physical replication slots - PostgreSQL 9.4 and later only
|
# use physical replication slots - PostgreSQL 9.4 and later only
|
||||||
# (default: 0)
|
# (default: 0)
|
||||||
use_replication_slots=0
|
#use_replication_slots=0
|
||||||
|
|
||||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||||
# number of standbys which will connect to the primary.
|
# number of standbys which will connect to the primary.
|
||||||
@@ -55,15 +56,15 @@ use_replication_slots=0
|
|||||||
|
|
||||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||||
# (default: NOTICE)
|
# (default: NOTICE)
|
||||||
loglevel=NOTICE
|
#loglevel=NOTICE
|
||||||
|
|
||||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||||
# (default: STDERR)
|
# (default: STDERR)
|
||||||
logfacility=STDERR
|
#logfacility=STDERR
|
||||||
|
|
||||||
# stderr can be redirected to an arbitrary file:
|
# stderr can be redirected to an arbitrary file:
|
||||||
#
|
#
|
||||||
logfile='/var/log/repmgr/repmgr.log'
|
#logfile='/var/log/repmgr/repmgr.log'
|
||||||
|
|
||||||
# event notifications can be passed to an arbitrary external program
|
# event notifications can be passed to an arbitrary external program
|
||||||
# together with the following parameters:
|
# together with the following parameters:
|
||||||
@@ -77,12 +78,12 @@ logfile='/var/log/repmgr/repmgr.log'
|
|||||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||||
# so should be quoted in the provided command configuration, e.g.:
|
# so should be quoted in the provided command configuration, e.g.:
|
||||||
#
|
#
|
||||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||||
|
|
||||||
# By default, all notifications will be passed; the notification types
|
# By default, all notifications will be passed; the notification types
|
||||||
# can be filtered to explicitly named ones:
|
# can be filtered to explicitly named ones:
|
||||||
#
|
#
|
||||||
event_notifications=master_register,standby_register,witness_create
|
#event_notifications=master_register,standby_register,witness_create
|
||||||
|
|
||||||
|
|
||||||
# Environment/command settings
|
# Environment/command settings
|
||||||
@@ -90,17 +91,17 @@ event_notifications=master_register,standby_register,witness_create
|
|||||||
|
|
||||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||||
# (if not provided, defaults to system $PATH)
|
# (if not provided, defaults to system $PATH)
|
||||||
pg_bindir=/usr/bin/
|
#pg_bindir=/usr/bin/
|
||||||
|
|
||||||
# external command options
|
# external command options
|
||||||
|
|
||||||
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||||
ssh_options=-o "StrictHostKeyChecking no"
|
#ssh_options=-o "StrictHostKeyChecking no"
|
||||||
|
|
||||||
# external command arguments. Values shown are examples.
|
# external command arguments. Values shown are examples.
|
||||||
|
|
||||||
pg_ctl_options='-s'
|
#pg_ctl_options='-s'
|
||||||
pg_basebackup_options='--xlog-method=s'
|
#pg_basebackup_options='--xlog-method=s'
|
||||||
|
|
||||||
|
|
||||||
# Standby clone settings
|
# Standby clone settings
|
||||||
@@ -122,27 +123,30 @@ pg_basebackup_options='--xlog-method=s'
|
|||||||
# Number of seconds to wait for a response from the primary server before
|
# Number of seconds to wait for a response from the primary server before
|
||||||
# deciding it has failed.
|
# deciding it has failed.
|
||||||
|
|
||||||
master_response_timeout=60
|
#master_response_timeout=60
|
||||||
|
|
||||||
# Number of attempts at what interval (in seconds) to try and
|
# Number of attempts at what interval (in seconds) to try and
|
||||||
# connect to a server to establish its status (e.g. master
|
# connect to a server to establish its status (e.g. master
|
||||||
# during failover)
|
# during failover)
|
||||||
reconnect_attempts=6
|
#reconnect_attempts=6
|
||||||
reconnect_interval=10
|
#reconnect_interval=10
|
||||||
|
|
||||||
# Autofailover options
|
# Autofailover options
|
||||||
failover=manual # one of 'automatic', 'manual'
|
#failover=manual # one of 'automatic', 'manual'
|
||||||
# (default: manual)
|
# (default: manual)
|
||||||
priority=100 # a value of zero or less prevents the node being promoted to primary
|
#priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||||
# (default: 100)
|
# (default: 100)
|
||||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||||
|
|
||||||
# monitoring interval in seconds; default is 2
|
# monitoring interval in seconds; default is 2
|
||||||
monitor_interval_secs=2
|
#monitor_interval_secs=2
|
||||||
|
|
||||||
# change wait time for primary; before we bail out and exit when the primary
|
# change wait time for primary; before we bail out and exit when the primary
|
||||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||||
# default value is 300)
|
# default value is 300)
|
||||||
retry_promote_interval_secs=300
|
#retry_promote_interval_secs=300
|
||||||
|
|
||||||
|
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||||
|
#witness_repl_nodes_sync_interval_secs=15
|
||||||
|
|||||||
27
repmgr.h
27
repmgr.h
@@ -32,8 +32,6 @@
|
|||||||
#define MIN_SUPPORTED_VERSION "9.3"
|
#define MIN_SUPPORTED_VERSION "9.3"
|
||||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#define MAXFILENAME 1024
|
|
||||||
#define ERRBUFF_SIZE 512
|
#define ERRBUFF_SIZE 512
|
||||||
|
|
||||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||||
@@ -57,8 +55,8 @@ typedef struct
|
|||||||
char dbname[MAXLEN];
|
char dbname[MAXLEN];
|
||||||
char host[MAXLEN];
|
char host[MAXLEN];
|
||||||
char username[MAXLEN];
|
char username[MAXLEN];
|
||||||
char dest_dir[MAXFILENAME];
|
char dest_dir[MAXPGPATH];
|
||||||
char config_file[MAXFILENAME];
|
char config_file[MAXPGPATH];
|
||||||
char remote_user[MAXLEN];
|
char remote_user[MAXLEN];
|
||||||
char superuser[MAXLEN];
|
char superuser[MAXLEN];
|
||||||
char wal_keep_segments[MAXLEN];
|
char wal_keep_segments[MAXLEN];
|
||||||
@@ -67,7 +65,7 @@ typedef struct
|
|||||||
bool force;
|
bool force;
|
||||||
bool wait_for_master;
|
bool wait_for_master;
|
||||||
bool ignore_rsync_warn;
|
bool ignore_rsync_warn;
|
||||||
bool initdb_no_pwprompt;
|
bool witness_pwprompt;
|
||||||
bool rsync_only;
|
bool rsync_only;
|
||||||
bool fast_checkpoint;
|
bool fast_checkpoint;
|
||||||
bool ignore_external_config_files;
|
bool ignore_external_config_files;
|
||||||
@@ -81,7 +79,7 @@ typedef struct
|
|||||||
|
|
||||||
/* parameter used by STANDBY SWITCHOVER */
|
/* parameter used by STANDBY SWITCHOVER */
|
||||||
char remote_config_file[MAXLEN];
|
char remote_config_file[MAXLEN];
|
||||||
char pg_rewind[MAXFILENAME];
|
char pg_rewind[MAXPGPATH];
|
||||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||||
char config_archive_dir[MAXLEN];
|
char config_archive_dir[MAXLEN];
|
||||||
/* parameter used by CLUSTER CLEANUP */
|
/* parameter used by CLUSTER CLEANUP */
|
||||||
@@ -91,11 +89,24 @@ typedef struct
|
|||||||
|
|
||||||
char recovery_min_apply_delay[MAXLEN];
|
char recovery_min_apply_delay[MAXLEN];
|
||||||
|
|
||||||
/* deprecated command line option */
|
/* deprecated command line options */
|
||||||
char localport[MAXLEN];
|
char localport[MAXLEN];
|
||||||
|
bool initdb_no_pwprompt;
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "" }
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
|
||||||
|
|
||||||
|
struct BackupLabel
|
||||||
|
{
|
||||||
|
XLogRecPtr start_wal_location;
|
||||||
|
char start_wal_file[MAXLEN];
|
||||||
|
XLogRecPtr checkpoint_location;
|
||||||
|
char backup_from[MAXLEN];
|
||||||
|
char backup_method[MAXLEN];
|
||||||
|
char start_time[MAXLEN];
|
||||||
|
char label[MAXLEN];
|
||||||
|
XLogRecPtr min_failover_slot_lsn;
|
||||||
|
};
|
||||||
|
|
||||||
extern char repmgr_schema[MAXLEN];
|
extern char repmgr_schema[MAXLEN];
|
||||||
extern bool config_file_found;
|
extern bool config_file_found;
|
||||||
|
|||||||
245
repmgrd.c
245
repmgrd.c
@@ -142,6 +142,20 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
set_progname(argv[0]);
|
set_progname(argv[0]);
|
||||||
|
|
||||||
|
/* Disallow running as root to prevent directory ownership problems */
|
||||||
|
if (geteuid() == 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
_("%s: cannot be run as root\n"
|
||||||
|
"Please log in (using, e.g., \"su\") as the "
|
||||||
|
"(unprivileged) user that owns "
|
||||||
|
"the data directory.\n"
|
||||||
|
),
|
||||||
|
progname());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1)
|
while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
@@ -260,7 +274,14 @@ main(int argc, char **argv)
|
|||||||
/* Retrieve record for this node from the local database */
|
/* Retrieve record for this node from the local database */
|
||||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||||
|
|
||||||
/* No node record found - exit gracefully */
|
/*
|
||||||
|
* No node record found - exit gracefully
|
||||||
|
*
|
||||||
|
* Note: it's highly unlikely this situation will occur when starting
|
||||||
|
* repmgrd on a witness, unless someone goes to the trouble of
|
||||||
|
* deleting the node record from the previously copied table.
|
||||||
|
*/
|
||||||
|
|
||||||
if (node_info.node_id == NODE_NOT_FOUND)
|
if (node_info.node_id == NODE_NOT_FOUND)
|
||||||
{
|
{
|
||||||
log_err(_("No metadata record found for this node - terminating\n"));
|
log_err(_("No metadata record found for this node - terminating\n"));
|
||||||
@@ -277,9 +298,12 @@ main(int argc, char **argv)
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
/* Timer for repl_nodes synchronisation interval */
|
||||||
|
int sync_repl_nodes_elapsed = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set my server mode, establish a connection to master and start
|
* Set my server mode, establish a connection to master and start
|
||||||
* monitor
|
* monitoring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
switch (node_info.type)
|
switch (node_info.type)
|
||||||
@@ -388,8 +412,7 @@ main(int argc, char **argv)
|
|||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to connect to master node '%s'"),
|
_("unable to connect to master node"));
|
||||||
master_options.node_name);
|
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
@@ -452,6 +475,24 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On a witness node, regularly resync the repl_nodes table
|
||||||
|
* to keep up with any changes on the primary
|
||||||
|
*
|
||||||
|
* TODO: only resync the table if changes actually detected
|
||||||
|
*/
|
||||||
|
if (node_info.type == WITNESS)
|
||||||
|
{
|
||||||
|
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
|
||||||
|
log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
|
||||||
|
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
|
||||||
|
{
|
||||||
|
log_debug(_("Resyncing repl_nodes table\n"));
|
||||||
|
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||||
|
sync_repl_nodes_elapsed = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -466,6 +507,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failover_done)
|
if (failover_done)
|
||||||
{
|
{
|
||||||
log_debug(_("standby check loop will terminate\n"));
|
log_debug(_("standby check loop will terminate\n"));
|
||||||
@@ -558,7 +600,7 @@ witness_monitor(void)
|
|||||||
* XXX it would be neat to be able to handle this with e.g. table-based
|
* XXX it would be neat to be able to handle this with e.g. table-based
|
||||||
* logical replication
|
* logical replication
|
||||||
*/
|
*/
|
||||||
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -654,15 +696,15 @@ standby_monitor(void)
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
char monitor_standby_timestamp[MAXLEN];
|
char monitor_standby_timestamp[MAXLEN];
|
||||||
char last_wal_master_location[MAXLEN];
|
char last_wal_master_location[MAXLEN];
|
||||||
char last_wal_standby_received[MAXLEN];
|
char last_xlog_receive_location[MAXLEN];
|
||||||
char last_wal_standby_applied[MAXLEN];
|
char last_xlog_replay_location[MAXLEN];
|
||||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
bool last_wal_standby_received_gte_replayed;
|
bool last_xlog_receive_location_gte_replayed;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
XLogRecPtr lsn_master;
|
XLogRecPtr lsn_master_current_xlog_location;
|
||||||
XLogRecPtr lsn_standby_received;
|
XLogRecPtr lsn_last_xlog_receive_location;
|
||||||
XLogRecPtr lsn_standby_applied;
|
XLogRecPtr lsn_last_xlog_replay_location;
|
||||||
|
|
||||||
int connection_retries,
|
int connection_retries,
|
||||||
ret;
|
ret;
|
||||||
@@ -674,8 +716,9 @@ standby_monitor(void)
|
|||||||
t_node_info upstream_node;
|
t_node_info upstream_node;
|
||||||
|
|
||||||
int active_master_id;
|
int active_master_id;
|
||||||
const char *type = NULL;
|
const char *upstream_node_type = NULL;
|
||||||
|
|
||||||
|
bool receiving_streamed_wal = true;
|
||||||
/*
|
/*
|
||||||
* Verify that the local node is still available - if not there's
|
* Verify that the local node is still available - if not there's
|
||||||
* no point in doing much else anyway
|
* no point in doing much else anyway
|
||||||
@@ -700,9 +743,10 @@ standby_monitor(void)
|
|||||||
upstream_conn = get_upstream_connection(my_local_conn,
|
upstream_conn = get_upstream_connection(my_local_conn,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
&upstream_node_id, upstream_conninfo);
|
&upstream_node_id,
|
||||||
|
upstream_conninfo);
|
||||||
|
|
||||||
type = upstream_node_id == master_options.node
|
upstream_node_type = (upstream_node_id == master_options.node)
|
||||||
? "master"
|
? "master"
|
||||||
: "upstream";
|
: "upstream";
|
||||||
|
|
||||||
@@ -712,7 +756,7 @@ standby_monitor(void)
|
|||||||
* we cannot reconnect, try to get a new upstream node.
|
* we cannot reconnect, try to get a new upstream node.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
check_connection(&upstream_conn, type, upstream_conninfo);
|
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
||||||
/*
|
/*
|
||||||
* This takes up to local_options.reconnect_attempts *
|
* This takes up to local_options.reconnect_attempts *
|
||||||
* local_options.reconnect_interval seconds
|
* local_options.reconnect_interval seconds
|
||||||
@@ -725,7 +769,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (local_options.failover == MANUAL_FAILOVER)
|
if (local_options.failover == MANUAL_FAILOVER)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), type);
|
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);
|
||||||
|
|
||||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
{
|
{
|
||||||
@@ -784,7 +828,7 @@ standby_monitor(void)
|
|||||||
* Failover handling is handled differently depending on whether
|
* Failover handling is handled differently depending on whether
|
||||||
* the failed node is the master or a cascading standby
|
* the failed node is the master or a cascading standby
|
||||||
*/
|
*/
|
||||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||||
|
|
||||||
if (upstream_node.type == MASTER)
|
if (upstream_node.type == MASTER)
|
||||||
{
|
{
|
||||||
@@ -887,7 +931,7 @@ standby_monitor(void)
|
|||||||
* from the upstream node to write monitoring information
|
* from the upstream node to write monitoring information
|
||||||
*/
|
*/
|
||||||
|
|
||||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||||
|
|
||||||
sprintf(sqlquery,
|
sprintf(sqlquery,
|
||||||
"SELECT id "
|
"SELECT id "
|
||||||
@@ -956,29 +1000,42 @@ standby_monitor(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||||
last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
|
||||||
|
last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||||
? true
|
? true
|
||||||
: false;
|
: false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If pg_last_xlog_receive_location is NULL, this means we're in archive
|
||||||
|
* recovery and will need to calculate lag based on pg_last_xlog_replay_location
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Replayed WAL is greater than received streamed WAL
|
||||||
|
*/
|
||||||
|
if (PQgetisnull(res, 0, 1))
|
||||||
|
{
|
||||||
|
receiving_streamed_wal = false;
|
||||||
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the unusual event of a standby becoming disconnected from the primary,
|
* In the unusual event of a standby becoming disconnected from the primary,
|
||||||
* while this repmgrd remains connected to the primary, subtracting
|
* while this repmgrd remains connected to the primary, subtracting
|
||||||
* "lsn_standby_applied" from "lsn_standby_received" and coercing to
|
* "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
|
||||||
* (long long unsigned int) will result in a meaningless, very large
|
* (long long unsigned int) will result in a meaningless, very large
|
||||||
* value which will overflow a BIGINT column and spew error messages into the
|
* value which will overflow a BIGINT column and spew error messages into the
|
||||||
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
||||||
* to insert a monitoring record.
|
* to insert a monitoring record.
|
||||||
*/
|
*/
|
||||||
if (last_wal_standby_received_gte_replayed == false)
|
if (receiving_streamed_wal == true && last_xlog_receive_location_gte_replayed == false)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_WARNING,
|
log_verbose(LOG_WARNING,
|
||||||
"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
|
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get master xlog info */
|
/* Get master xlog info */
|
||||||
@@ -996,29 +1053,49 @@ standby_monitor(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Calculate the lag */
|
/* Calculate the lag */
|
||||||
lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||||
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
|
|
||||||
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
|
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||||
|
|
||||||
|
if (last_xlog_receive_location_gte_replayed == false)
|
||||||
|
{
|
||||||
|
lsn_last_xlog_receive_location = lsn_last_xlog_replay_location;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build the SQL to execute on master
|
* Build the SQL to execute on master
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_monitor "
|
"INSERT INTO %s.repl_monitor "
|
||||||
" (primary_node, standby_node, "
|
" (primary_node, "
|
||||||
" last_monitor_time, last_apply_time, "
|
" standby_node, "
|
||||||
" last_wal_primary_location, last_wal_standby_location, "
|
" last_monitor_time, "
|
||||||
" replication_lag, apply_lag ) "
|
" last_apply_time, "
|
||||||
" VALUES(%d, %d, "
|
" last_wal_primary_location, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
" last_wal_standby_location, "
|
||||||
" '%s', '%s', "
|
" replication_lag, "
|
||||||
" %llu, %llu) ",
|
" apply_lag ) "
|
||||||
|
" VALUES(%d, "
|
||||||
|
" %d, "
|
||||||
|
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||||
|
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||||
|
" '%s', "
|
||||||
|
" '%s', "
|
||||||
|
" %llu, "
|
||||||
|
" %llu) ",
|
||||||
get_repmgr_schema_quoted(master_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
master_options.node, local_options.node,
|
master_options.node,
|
||||||
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
|
local_options.node,
|
||||||
last_wal_master_location, last_wal_standby_received,
|
monitor_standby_timestamp,
|
||||||
(long long unsigned int)(lsn_master - lsn_standby_received),
|
last_xact_replay_timestamp,
|
||||||
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
|
last_wal_master_location,
|
||||||
|
last_xlog_receive_location,
|
||||||
|
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location),
|
||||||
|
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Execute the query asynchronously, but don't check for a result. We will
|
* Execute the query asynchronously, but don't check for a result. We will
|
||||||
@@ -1056,7 +1133,7 @@ do_master_failover(void)
|
|||||||
XLogRecPtr xlog_recptr;
|
XLogRecPtr xlog_recptr;
|
||||||
bool lsn_format_ok;
|
bool lsn_format_ok;
|
||||||
|
|
||||||
char last_wal_standby_applied[MAXLEN];
|
char last_xlog_replay_location[MAXLEN];
|
||||||
|
|
||||||
PGconn *node_conn = NULL;
|
PGconn *node_conn = NULL;
|
||||||
|
|
||||||
@@ -1239,8 +1316,8 @@ do_master_failover(void)
|
|||||||
" considered as new master and exit.\n"),
|
" considered as new master and exit.\n"),
|
||||||
PQerrorMessage(my_local_conn));
|
PQerrorMessage(my_local_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
|
||||||
update_shared_memory(last_wal_standby_applied);
|
update_shared_memory(last_xlog_replay_location);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
@@ -1370,9 +1447,6 @@ do_master_failover(void)
|
|||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Close the connection to this server */
|
|
||||||
PQfinish(my_local_conn);
|
|
||||||
my_local_conn = NULL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* determine which one is the best candidate to promote to master
|
* determine which one is the best candidate to promote to master
|
||||||
@@ -1420,18 +1494,24 @@ do_master_failover(void)
|
|||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_debug("best candidate node id is %i\n", best_candidate.node_id);
|
||||||
|
|
||||||
/* if local node is the best candidate, promote it */
|
/* if local node is the best candidate, promote it */
|
||||||
if (best_candidate.node_id == local_options.node)
|
if (best_candidate.node_id == local_options.node)
|
||||||
{
|
{
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
/* Close the connection to this server */
|
||||||
|
PQfinish(my_local_conn);
|
||||||
|
my_local_conn = NULL;
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
/* wait */
|
/* wait */
|
||||||
sleep(5);
|
sleep(5);
|
||||||
|
|
||||||
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||||
|
|
||||||
log_debug(_("promote command is: \"%s\"\n"),
|
log_debug("promote command is: \"%s\"\n",
|
||||||
local_options.promote_command);
|
local_options.promote_command);
|
||||||
|
|
||||||
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
@@ -1442,6 +1522,33 @@ do_master_failover(void)
|
|||||||
r = system(local_options.promote_command);
|
r = system(local_options.promote_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Check whether the primary reappeared, which will have caused the
|
||||||
|
* promote command to fail
|
||||||
|
*/
|
||||||
|
my_local_conn = establish_db_connection(local_options.conninfo, false);
|
||||||
|
|
||||||
|
if (my_local_conn != NULL)
|
||||||
|
{
|
||||||
|
int master_node_id;
|
||||||
|
|
||||||
|
master_conn = get_master_connection(my_local_conn,
|
||||||
|
local_options.cluster_name,
|
||||||
|
&master_node_id, NULL);
|
||||||
|
|
||||||
|
if (master_conn != NULL && master_node_id == failed_master.node_id)
|
||||||
|
{
|
||||||
|
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
|
||||||
|
|
||||||
|
PQfinish(master_conn);
|
||||||
|
/* no failover occurred but we'll want to restart connections */
|
||||||
|
failover_done = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(my_local_conn);
|
||||||
|
}
|
||||||
|
|
||||||
log_err(_("promote command failed. You could check and try it manually.\n"));
|
log_err(_("promote command failed. You could check and try it manually.\n"));
|
||||||
|
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
@@ -1473,11 +1580,39 @@ do_master_failover(void)
|
|||||||
{
|
{
|
||||||
PGconn *new_master_conn;
|
PGconn *new_master_conn;
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
int master_node_id;
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
/* wait */
|
/* wait */
|
||||||
sleep(10);
|
sleep(10);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether the primary reappeared while we were waiting, so we
|
||||||
|
* don't end up following the promotion candidate
|
||||||
|
*/
|
||||||
|
|
||||||
|
master_conn = get_master_connection(my_local_conn,
|
||||||
|
local_options.cluster_name,
|
||||||
|
&master_node_id, NULL);
|
||||||
|
|
||||||
|
if (master_conn != NULL && master_node_id == failed_master.node_id)
|
||||||
|
{
|
||||||
|
log_notice(_("Original master reappeared - no action taken\n"));
|
||||||
|
|
||||||
|
PQfinish(master_conn);
|
||||||
|
/* no failover occurred but we'll want to restart connections */
|
||||||
|
failover_done = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Close the connection to this server */
|
||||||
|
PQfinish(my_local_conn);
|
||||||
|
my_local_conn = NULL;
|
||||||
|
|
||||||
|
/* XXX double-check the promotion candidate did become the new primary */
|
||||||
|
|
||||||
log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"),
|
log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"),
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
|
|
||||||
@@ -1601,7 +1736,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
|
|
||||||
if (PQntuples(res) == 0)
|
if (PQntuples(res) == 0)
|
||||||
{
|
{
|
||||||
log_err(_("no node with id %i found"), upstream_node_id);
|
log_err(_("no node with id %i found\n"), upstream_node_id);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1934,6 +2069,8 @@ check_node_configuration(void)
|
|||||||
/* Adding the node */
|
/* Adding the node */
|
||||||
log_info(_("adding node %d to cluster '%s'\n"),
|
log_info(_("adding node %d to cluster '%s'\n"),
|
||||||
local_options.node, local_options.cluster_name);
|
local_options.node, local_options.cluster_name);
|
||||||
|
|
||||||
|
/* XXX use create_node_record() */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_nodes"
|
"INSERT INTO %s.repl_nodes"
|
||||||
" (id, cluster, name, conninfo, priority, witness) "
|
" (id, cluster, name, conninfo, priority, witness) "
|
||||||
@@ -2055,7 +2192,7 @@ terminate(int retval)
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_shared_memory(char *last_wal_standby_applied)
|
update_shared_memory(char *last_xlog_replay_location)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
@@ -2063,7 +2200,7 @@ update_shared_memory(char *last_wal_standby_applied)
|
|||||||
sprintf(sqlquery,
|
sprintf(sqlquery,
|
||||||
"SELECT %s.repmgr_update_standby_location('%s')",
|
"SELECT %s.repmgr_update_standby_location('%s')",
|
||||||
get_repmgr_schema_quoted(my_local_conn),
|
get_repmgr_schema_quoted(my_local_conn),
|
||||||
last_wal_standby_applied);
|
last_xlog_replay_location);
|
||||||
|
|
||||||
/* If an error happens, just inform about that and continue */
|
/* If an error happens, just inform about that and continue */
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
@@ -2284,7 +2421,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
|
|
||||||
if (res == 0)
|
if (res == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("No record found record for node %i\n"), node_id);
|
log_warning(_("No record found for node %i\n"), node_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return node_info;
|
return node_info;
|
||||||
|
|||||||
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
|
||||||
|
* --------------------------------------------------
|
||||||
|
*
|
||||||
|
* This update is only required if repmgrd is being used in conjunction
|
||||||
|
* with a witness server.
|
||||||
|
*
|
||||||
|
* The new repmgr package should be installed first. Then
|
||||||
|
* carry out these steps:
|
||||||
|
*
|
||||||
|
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||||
|
* 2. On the master node, execute the SQL statement listed below
|
||||||
|
* 3. (If repmgrd is used) restart repmgrd
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If your repmgr installation is not included in your repmgr
|
||||||
|
* user's search path, please set the search path to the name
|
||||||
|
* of the repmgr schema to ensure objects are installed in
|
||||||
|
* the correct location.
|
||||||
|
*
|
||||||
|
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||||
|
* 'repmgr.conf'.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-- SET search_path TO 'name_of_repmgr_schema';
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE;
|
||||||
|
COMMIT;
|
||||||
@@ -83,7 +83,12 @@ _PG_init(void)
|
|||||||
* resources in repmgr_shmem_startup().
|
* resources in repmgr_shmem_startup().
|
||||||
*/
|
*/
|
||||||
RequestAddinShmemSpace(repmgr_memsize());
|
RequestAddinShmemSpace(repmgr_memsize());
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= 90600)
|
||||||
|
RequestNamedLWLockTranche("repmgr", 1);
|
||||||
|
#else
|
||||||
RequestAddinLWLocks(1);
|
RequestAddinLWLocks(1);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Install hooks.
|
* Install hooks.
|
||||||
@@ -128,7 +133,11 @@ repmgr_shmem_startup(void)
|
|||||||
if (!found)
|
if (!found)
|
||||||
{
|
{
|
||||||
/* First time through ... */
|
/* First time through ... */
|
||||||
|
#if (PG_VERSION_NUM >= 90600)
|
||||||
|
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
|
||||||
|
#else
|
||||||
shared_state->lock = LWLockAssign();
|
shared_state->lock = LWLockAssign();
|
||||||
|
#endif
|
||||||
snprintf(shared_state->location,
|
snprintf(shared_state->location,
|
||||||
sizeof(shared_state->location), "%X/%X", 0, 0);
|
sizeof(shared_state->location), "%X/%X", 0, 0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,12 +24,17 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
|
|
||||||
|
|
||||||
#define QUERY_STR_LEN 8192
|
#define QUERY_STR_LEN 8192
|
||||||
#define MAXLEN 1024
|
#define MAXLEN 1024
|
||||||
#define MAXLINELENGTH 4096
|
#define MAXLINELENGTH 4096
|
||||||
#define MAXVERSIONSTR 16
|
#define MAXVERSIONSTR 16
|
||||||
#define MAXCONNINFO 1024
|
#define MAXCONNINFO 1024
|
||||||
|
|
||||||
|
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
||||||
|
#define STR(x) CppAsString(x)
|
||||||
|
|
||||||
|
#define MAXLEN_STR STR(MAXLEN)
|
||||||
|
|
||||||
extern int
|
extern int
|
||||||
xsnprintf(char *str, size_t size, const char *format,...)
|
xsnprintf(char *str, size_t size, const char *format,...)
|
||||||
|
|||||||
Reference in New Issue
Block a user