mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
103 Commits
v3.2.1
...
REL3_1_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3802b917e0 | ||
|
|
4f7a2a0614 | ||
|
|
06c7fe04b0 | ||
|
|
1fe01e9168 | ||
|
|
ed1136f443 | ||
|
|
a7ed60a533 | ||
|
|
fc5a18410d | ||
|
|
fd52c8ec3c | ||
|
|
47f1c6fa84 | ||
|
|
fba89ef37c | ||
|
|
4cc6cbe32f | ||
|
|
c715077c29 | ||
|
|
c178d8ed27 | ||
|
|
d4d06f43f7 | ||
|
|
0d346a9f54 | ||
|
|
abb16e4366 | ||
|
|
59b1924d5b | ||
|
|
c88ea62643 | ||
|
|
5b91a5e2e5 | ||
|
|
c2a1a35282 | ||
|
|
2b8b74ae75 | ||
|
|
08ef4d4be6 | ||
|
|
1a0049f086 | ||
|
|
af6f0fc2cf | ||
|
|
893d67473d | ||
|
|
a922cd5558 | ||
|
|
7bbc664230 | ||
|
|
a6998fe0f9 | ||
|
|
dadfdcc51f | ||
|
|
b8823d5c1f | ||
|
|
e59b57376d | ||
|
|
3db87e6a31 | ||
|
|
94d05619c3 | ||
|
|
807c7c926c | ||
|
|
df68f1f3f6 | ||
|
|
d4c75bb6c7 | ||
|
|
94d4e1128d | ||
|
|
dbd82ba687 | ||
|
|
0888fbc538 | ||
|
|
92a84bd950 | ||
|
|
a3318d65d2 | ||
|
|
374e9811c9 | ||
|
|
16896510dc | ||
|
|
1c155a1088 | ||
|
|
31d57f4122 | ||
|
|
7b313b9d71 | ||
|
|
cf126642bd | ||
|
|
52281fcde8 | ||
|
|
de573edaaa | ||
|
|
4cb7f301ad | ||
|
|
87d8de4441 | ||
|
|
6db742f81e | ||
|
|
c79933685c | ||
|
|
04ba672b9f | ||
|
|
4f4111063a | ||
|
|
3a3a536e6d | ||
|
|
6f7206a5a1 | ||
|
|
f9fd1dd227 | ||
|
|
8140ba9c27 | ||
|
|
32dba444e1 | ||
|
|
8212ff8d8a | ||
|
|
1ccd0edad2 | ||
|
|
59b31dd1ca | ||
|
|
300b9f0cc2 | ||
|
|
0efee4cf65 | ||
|
|
0cb2584886 | ||
|
|
b88d27248c | ||
|
|
683c54325e | ||
|
|
70d398cd47 | ||
|
|
7b7d80e5f2 | ||
|
|
96b0e26084 | ||
|
|
91c498f6f1 | ||
|
|
d48093e732 | ||
|
|
3f0d1754a4 | ||
|
|
f27979bbe1 | ||
|
|
e9445a5d5e | ||
|
|
9a2717b5e3 | ||
|
|
dd6ea1cd77 | ||
|
|
de5908c122 | ||
|
|
4b5c84921c | ||
|
|
aaa8d70cef | ||
|
|
ca31b846e7 | ||
|
|
a27cecb559 | ||
|
|
cf0cdfa6a1 | ||
|
|
31489d92c0 | ||
|
|
b7fd13aed2 | ||
|
|
3c4bf27aa7 | ||
|
|
0ebd9c15d9 | ||
|
|
f9dba283d4 | ||
|
|
205f1cebbb | ||
|
|
4d97c1ebf7 | ||
|
|
12c395e91f | ||
|
|
bd1e4f71d6 | ||
|
|
cb49071ea4 | ||
|
|
5ad674edff | ||
|
|
ac09bad89c | ||
|
|
009d92fec8 | ||
|
|
b3d8a68a1d | ||
|
|
05b47cb2a8 | ||
|
|
dc542a1b7d | ||
|
|
6ce8058749 | ||
|
|
2edcac77f0 | ||
|
|
f740374392 |
12
FAQ.md
12
FAQ.md
@@ -137,7 +137,6 @@ General
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the `repl_nodes` table.
|
||||
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
|
||||
@@ -152,9 +151,6 @@ General
|
||||
|
||||
In `repmgr.conf`, set its priority to a value of 0 or less.
|
||||
|
||||
Additionally, if `failover` is set to `manual`, the node will never
|
||||
be considered as a promotion candidate.
|
||||
|
||||
- Does `repmgrd` support delayed standbys?
|
||||
|
||||
`repmgrd` can monitor delayed standbys - those set up with
|
||||
@@ -173,11 +169,3 @@ General
|
||||
|
||||
Configure your system's `logrotate` service to do this; see example
|
||||
in README.md
|
||||
|
||||
- I've recloned a failed master as a standby, but `repmgrd` refuses to start?
|
||||
|
||||
Check you registered the standby after recloning. If unregistered the standby
|
||||
cannot be considered as a promotion candidate even if `failover` is set to
|
||||
`automatic`, which is probably not what you want. `repmgrd` will start if
|
||||
`failover` is set to `manual` so the node's replication status can still
|
||||
be monitored, if desired.
|
||||
|
||||
36
HISTORY
36
HISTORY
@@ -1,39 +1,3 @@
|
||||
3.2.1 2016-10-24
|
||||
repmgr: require a valid repmgr cluster name unless -F/--force
|
||||
supplied (Ian)
|
||||
repmgr: check master server is registered with repmgr before
|
||||
cloning (Ian)
|
||||
repmgr: ensure data directory defaults to that of the source node (Ian)
|
||||
repmgr: various fixes to Barman cloning mode (Gianni, Ian)
|
||||
repmgr: fix `repmgr cluster crosscheck` output (Ian)
|
||||
|
||||
3.2 2016-10-05
|
||||
repmgr: add support for cloning from a Barman backup (Gianni)
|
||||
repmgr: add commands `standby matrix` and `standby crosscheck` (Gianni)
|
||||
repmgr: suppress connection error display in `repmgr cluster show`
|
||||
unless `--verbose` supplied (Ian)
|
||||
repmgr: add commands `witness register` and `witness unregister` (Ian)
|
||||
repmgr: enable `standby unregister` / `witness unregister` to be
|
||||
executed for a node which is not running (Ian)
|
||||
repmgr: remove deprecated command line options --initdb-no-pwprompt and
|
||||
-l/--local-port (Ian)
|
||||
repmgr: before cloning with pg_basebackup, check that sufficient free
|
||||
walsenders are available (Ian)
|
||||
repmgr: add option `--wait-sync` for `standby register` which causes
|
||||
repmgr to wait for the registered node record to synchronise to
|
||||
the standby (Ian)
|
||||
repmgr: add option `--copy-external-config-files` for files outside
|
||||
of the data directory (Ian)
|
||||
repmgr: only require `wal_keep_segments` to be set in certain corner
|
||||
cases (Ian)
|
||||
repmgr: better support cloning from a node other than the one to
|
||||
stream from (Ian)
|
||||
repmgrd: add configuration options to override the default pg_ctl
|
||||
commands (Jarkko Oranen, Ian)
|
||||
repmgrd: don't start if node is inactive and failover=automatic (Ian)
|
||||
packaging: improve "repmgr-auto" Debian package (Gianni)
|
||||
|
||||
|
||||
3.1.5 2016-08-15
|
||||
repmgrd: in a failover situation, prevent endless looping when
|
||||
attempting to establish the status of a node with
|
||||
|
||||
6
Makefile
6
Makefile
@@ -87,12 +87,10 @@ PG_VERSION = $(shell pg_config --version | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
|
||||
REPMGR_VERSION = $(shell grep REPMGR_VERSION version.h | cut -d ' ' -f 3 | cut -d '"' -f 2)
|
||||
PKGLIBDIR = $(shell pg_config --pkglibdir)
|
||||
SHAREDIR = $(shell pg_config --sharedir)
|
||||
PGBINDIR = /usr/lib/postgresql/$(PG_VERSION)/bin
|
||||
|
||||
deb: repmgrd repmgr
|
||||
mkdir -p ./debian/usr/bin ./debian$(PGBINDIR)
|
||||
cp repmgrd repmgr ./debian$(PGBINDIR)
|
||||
ln -s ../..$(PGBINDIR)/repmgr ./debian/usr/bin/repmgr
|
||||
mkdir -p ./debian/usr/bin
|
||||
cp repmgrd repmgr ./debian/usr/bin/
|
||||
mkdir -p ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/uninstall_repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
|
||||
448
README.md
448
README.md
@@ -7,8 +7,6 @@ replication capabilities with utilities to set up standby servers, monitor
|
||||
replication, and perform administrative tasks such as failover or switchover
|
||||
operations.
|
||||
|
||||
The current `repmgr` version, 3.2, supports all PostgreSQL versions from
|
||||
9.3 to 9.6.
|
||||
|
||||
Overview
|
||||
--------
|
||||
@@ -145,27 +143,10 @@ The `repmgr` tools must be installed on each server in the replication cluster.
|
||||
|
||||
A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
|
||||
`repmgrd` actions require direct access to the PostgreSQL data directory,
|
||||
these commands should be executed by the `postgres` user.
|
||||
it should be executed by the `postgres` user.
|
||||
|
||||
Passwordless `ssh` connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
|
||||
* if you need `repmgr` to copy configuration files from outside the PostgreSQL
|
||||
data directory
|
||||
* when using `rsync` to clone a standby
|
||||
* to perform switchover operations
|
||||
* when executing `repmgr cluster matrix` and `repmgr cluster crosscheck`
|
||||
|
||||
In these cases `rsync` is required on all servers too.
|
||||
|
||||
* * *
|
||||
|
||||
> *TIP*: We recommend using a session multiplexer utility such as `screen` or
|
||||
> `tmux` when performing long-running actions (such as cloning a database)
|
||||
> on a remote server - this will ensure the `repmgr` action won't be prematurely
|
||||
> terminated if your `ssh` session to the server is interrupted or closed.
|
||||
|
||||
* * *
|
||||
Additionally, we recommend installing `rsync` and enabling passwordless
|
||||
`ssh` connectivity between all servers in the replication cluster.
|
||||
|
||||
### Packages
|
||||
|
||||
@@ -288,7 +269,7 @@ both servers.
|
||||
### PostgreSQL configuration
|
||||
|
||||
On the master server, a PostgreSQL instance must be initialised and running.
|
||||
The following replication settings may need to be adjusted:
|
||||
The following replication settings must be included in `postgresql.conf`:
|
||||
|
||||
|
||||
# Enable replication connections; set this figure to at least one more
|
||||
@@ -303,6 +284,13 @@ The following replication settings may need to be adjusted:
|
||||
|
||||
wal_level = 'hot_standby'
|
||||
|
||||
# How much WAL to retain on the master to allow a temporarily
|
||||
# disconnected standby to catch up again. The larger this is, the
|
||||
# longer the standby can be disconnected. This is needed only in
|
||||
# 9.3; from 9.4, replication slots can be used instead (see below).
|
||||
|
||||
wal_keep_segments = 5000
|
||||
|
||||
# Enable read-only queries on a standby
|
||||
# (Note: this will be ignored on a master but we recommend including
|
||||
# it anyway)
|
||||
@@ -317,14 +305,6 @@ The following replication settings may need to be adjusted:
|
||||
# ignores archiving. Use something more sensible.
|
||||
archive_command = '/bin/true'
|
||||
|
||||
# If cloning using rsync, or you have configured `pg_basebackup_options`
|
||||
# in `repmgr.conf` to include the setting `--xlog-method=fetch`, *and*
|
||||
# you have not set `restore_command` in `repmgr.conf`to fetch WAL files
|
||||
# from another source such as Barman, you'll need to set `wal_keep_segments`
|
||||
# to a high enough value to ensure that all WAL files generated while
|
||||
# the standby is being cloned are retained until the standby starts up.
|
||||
|
||||
# wal_keep_segments = 5000
|
||||
|
||||
* * *
|
||||
|
||||
@@ -395,16 +375,6 @@ to include this schema name, e.g.
|
||||
|
||||
ALTER USER repmgr SET search_path TO repmgr_test, "$user", public;
|
||||
|
||||
* * *
|
||||
|
||||
> *TIP*: for Debian-based distributions we recommend explictly setting
|
||||
> `pg_bindir` to the directory where `pg_ctl` and other binaries not in
|
||||
> the standard path are located. For PostgreSQL 9.5 this would be
|
||||
> `/usr/lib/postgresql/9.5/bin/`.
|
||||
|
||||
* * *
|
||||
|
||||
|
||||
### Initialise the master server
|
||||
|
||||
To enable `repmgr` to support a replication cluster, the master node must
|
||||
@@ -450,35 +420,8 @@ Clone the standby with:
|
||||
This will clone the PostgreSQL data directory files from the master at `repmgr_node1`
|
||||
using PostgreSQL's `pg_basebackup` utility. A `recovery.conf` file containing the
|
||||
correct parameters to start streaming from this master server will be created
|
||||
automatically.
|
||||
|
||||
Note that by default, any configuration files in the master's data directory will be
|
||||
copied to the standby. Typically these will be `postgresql.conf`, `postgresql.auto.conf`,
|
||||
`pg_hba.conf` and `pg_ident.conf`. These may require modification before the standby
|
||||
is started so it functions as desired.
|
||||
|
||||
In some cases (e.g. on Debian or Ubuntu Linux installations), PostgreSQL's
|
||||
configuration files are located outside of the data directory and will
|
||||
not be copied by default. `repmgr` can copy these files, either to the same
|
||||
location on the standby server (provided appropriate directory and file permissions
|
||||
are available), or into the standby's data directory. This requires passwordless
|
||||
SSH access to the master server. Add the option `--copy-external-config-files`
|
||||
to the `repmgr standby clone` command; by default files will be copied to
|
||||
the same path as on the upstream server. To have them placed in the standby's
|
||||
data directory, specify `--copy-external-config-files=pgdata`, but note that
|
||||
any include directives in the copied files may need to be updated.
|
||||
|
||||
*Caveat*: when copying external configuration files: `repmgr` will only be able
|
||||
to detect files which contain active settings. If a file is referenced by
|
||||
an include directive but is empty, only contains comments or contains
|
||||
settings which have not been activated, the file will not be copied.
|
||||
|
||||
* * *
|
||||
|
||||
> *TIP*: for reliable configuration file management we recommend using a
|
||||
> configuration management tool such as Ansible, Chef, Puppet or Salt.
|
||||
|
||||
* * *
|
||||
automatically, and unless otherwise specified, the `postgresql.conf` and `pg_hba.conf`
|
||||
files will be copied from the master.
|
||||
|
||||
Be aware that when initially cloning a standby, you will need to ensure
|
||||
that all required WAL files remain available while the cloning is taking
|
||||
@@ -560,102 +503,13 @@ standby's upstream server is the replication cluster master. While of limited
|
||||
use in a simple master/standby replication cluster, this information is required
|
||||
to effectively manage cascading replication (see below).
|
||||
|
||||
* * *
|
||||
|
||||
> *TIP*: depending on your environment and workload, it may take some time for
|
||||
> the standby's node record to propagate from the master to the standby. Some
|
||||
> actions (such as starting `repmgrd`) require that the standby's node record
|
||||
> is present and up-to-date to function correctly - by providing the option
|
||||
> `--wait-sync` to the `repmgr standby register` command, `repmgr` will wait
|
||||
> until the record is synchronised before exiting. An optional timeout (in
|
||||
> seconds) can be added to this option (e.g. `--wait-sync=60`).
|
||||
|
||||
* * *
|
||||
|
||||
### Using Barman to clone a standby
|
||||
|
||||
`repmgr standby clone` also supports Barman, the Backup and
|
||||
Replication manager (http://www.pgbarman.org/), as a provider of both
|
||||
base backups and WAL files.
|
||||
|
||||
Barman support provides the following advantages:
|
||||
|
||||
- the master node does not need to perform a new backup every time a
|
||||
new standby is cloned;
|
||||
- a standby node can be disconnected for longer periods without losing
|
||||
the ability to catch up, and without causing accumulation of WAL
|
||||
files on the master node;
|
||||
- therefore, `repmgr` does not need to use replication slots, and the
|
||||
master node does not need to set `wal_keep_segments`.
|
||||
|
||||
> *NOTE*: In view of the above, Barman support is incompatible with
|
||||
> the `use_replication_slots` setting in `repmgr.conf`.
|
||||
|
||||
In order to enable Barman support for `repmgr standby clone`, you must
|
||||
ensure that:
|
||||
|
||||
- the name of the server configured in Barman is equal to the
|
||||
`cluster_name` setting in `repmgr.conf`;
|
||||
- the `barman_server` setting in `repmgr.conf` is set to the SSH
|
||||
hostname of the Barman server;
|
||||
- the `restore_command` setting in `repmgr.conf` is configured to
|
||||
use a copy of the `barman-wal-restore` script shipped with the
|
||||
`barman-cli` package (see below);
|
||||
- the Barman catalogue includes at least one valid backup for this
|
||||
server.
|
||||
|
||||
> *NOTE*: Barman support is automatically enabled if `barman_server`
|
||||
> is set. Normally it is a good practice to use Barman, for instance
|
||||
> when fetching a base backup while cloning a standby; in any case,
|
||||
> Barman mode can be disabled using the `--without-barman` command
|
||||
> line option.
|
||||
|
||||
> *NOTE*: if you have a non-default SSH configuration on the Barman
|
||||
> server, e.g. using a port other than 22, then you can set those
|
||||
> parameters in a dedicated Host section in `~/.ssh/config`
|
||||
> corresponding to the value of `barman_server` in `repmgr.conf`. See
|
||||
> the "Host" section in `man 5 ssh_config` for more details.
|
||||
|
||||
`barman-wal-restore` is a Python script provided by the Barman
|
||||
development team as part of the `barman-cli` package (Barman 2.0
|
||||
and later; for Barman 1.x the script is provided separately as
|
||||
`barman-wal-restore.py`).
|
||||
|
||||
`restore_command` must then be set in `repmgr.conf` as follows:
|
||||
|
||||
<script> <Barman hostname> <cluster_name> %f %p
|
||||
|
||||
For instance, suppose that we have installed Barman on the `barmansrv`
|
||||
host, and that `barman-wal-restore` is located as an executable at
|
||||
`/usr/bin/barman-wal-restore`; `repmgr.conf` should include the following
|
||||
lines:
|
||||
|
||||
barman_server=barmansrv
|
||||
restore_command=/usr/bin/barman-wal-restore barmansrv test %f %p
|
||||
|
||||
NOTE: to use a non-default Barman configuration file on the Barman server,
|
||||
specify this in `repmgr.conf` with `barman_config`:
|
||||
|
||||
barman_config=/path/to/barman.conf
|
||||
|
||||
Now we can clone a standby using the Barman server:
|
||||
|
||||
$ repmgr -h node1 -D 9.5/main -f /etc/repmgr.conf standby clone
|
||||
[2016-06-12 20:08:35] [NOTICE] destination directory '9.5/main' provided
|
||||
[2016-06-12 20:08:35] [NOTICE] getting backup from Barman...
|
||||
[2016-06-12 20:08:36] [NOTICE] standby clone (from Barman) complete
|
||||
[2016-06-12 20:08:36] [NOTICE] you can now start your PostgreSQL server
|
||||
[2016-06-12 20:08:36] [HINT] for example : pg_ctl -D 9.5/data start
|
||||
[2016-06-12 20:08:36] [HINT] After starting the server, you need to register this standby with "repmgr standby register"
|
||||
|
||||
|
||||
|
||||
Advanced options for cloning a standby
|
||||
--------------------------------------
|
||||
|
||||
The above section demonstrates the simplest possible way to clone a standby
|
||||
server. Depending on your circumstances, finer-grained control over the
|
||||
cloning process may be necessary.
|
||||
The above section demonstrates the simplest possible way to cloneb a standby
|
||||
server. Depending on your circumstances, finer-grained controlover the cloning
|
||||
process may be necessary.
|
||||
|
||||
### pg_basebackup options when cloning a standby
|
||||
|
||||
@@ -668,7 +522,11 @@ so should be used with care.
|
||||
Further options can be passed to the `pg_basebackup` utility via
|
||||
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||
documentation for more details of available options:
|
||||
<<<<<<< HEAD
|
||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
=======
|
||||
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
>>>>>>> 72f9b0145afab1060dd1202c8f8937653c8b2e39
|
||||
|
||||
### Using rsync to clone a standby
|
||||
|
||||
@@ -686,33 +544,35 @@ and destination server as the contents of files existing on both servers need
|
||||
to be compared, meaning this method is not necessarily faster than making a
|
||||
fresh clone with `pg_basebackup`.
|
||||
|
||||
> *NOTE*: `barman-wal-restore` supports command line switches to
|
||||
> control parallelism (`--parallel=N`) and compression (`--bzip2`,
|
||||
> `--gzip`).
|
||||
### Dealing with PostgreSQL configuration files
|
||||
|
||||
By default, `repmgr` will attempt to copy the standard configuration files
|
||||
(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
|
||||
outside of the data directory (though currently they will be copied
|
||||
into the standby's data directory). To prevent this happening, when executing
|
||||
`repmgr standby clone` provide the `--ignore-external-config-files` option.
|
||||
|
||||
If using `rsync` to clone a standby, additional control over which files
|
||||
not to transfer is possible by configuring `rsync_options` in `repmgr.conf`,
|
||||
which enables any valid `rsync` options to be passed to that command, e.g.:
|
||||
|
||||
rsync_options='--exclude=postgresql.local.conf'
|
||||
|
||||
### Controlling `primary_conninfo` in `recovery.conf`
|
||||
|
||||
The `primary_conninfo` setting in `recovery.conf` generated by `repmgr`
|
||||
is generated from the following sources, in order of highest to lowest priority:
|
||||
|
||||
- the upstream node's `conninfo` setting (as defined in the `repl_nodes` table)
|
||||
- the connection parameters provided to `repmgr standby clone`
|
||||
- PostgreSQL's standard connection defaults, including any environment variables
|
||||
set on the local node.
|
||||
`repmgr` will create the `primary_conninfo` setting in `recovery.conf` based
|
||||
on the connection parameters provided to `repmgr standby clone` and PostgreSQL's
|
||||
standard connection defaults, including any environment variables set on the
|
||||
local node.
|
||||
|
||||
To include specific connection parameters other than the standard host, port,
|
||||
username and database values (e.g. `sslmode`), include these in a `conninfo`-style
|
||||
string passed to `repmgr` with `-d/--dbname` (see above for details), and/or set
|
||||
tring passed to `repmgr` with `-d/--dbname` (see above for details), and/or set
|
||||
appropriate environment variables.
|
||||
|
||||
Note that PostgreSQL will always set explicit defaults for `sslmode` and
|
||||
`sslcompression`.
|
||||
|
||||
If `application_name` is set in the standby's `conninfo` parameter in
|
||||
`repmgr.conf`, this value will be appended to `primary_conninfo`, otherwise
|
||||
`repmgr` will set `application_name` to the same value as the `node_name`
|
||||
parameter.
|
||||
|
||||
|
||||
Setting up cascading replication with repmgr
|
||||
--------------------------------------------
|
||||
@@ -1012,7 +872,7 @@ should have been updated to reflect this:
|
||||
at a two-server master/standby replication cluster and currently does
|
||||
not support additional standbys.
|
||||
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
||||
standard in 9.5 and later and available separately in 9.3 and 9.4
|
||||
standard in 9.5 and later and available for seperately in 9.3 and 9.4
|
||||
(see note below)
|
||||
- `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
@@ -1024,7 +884,7 @@ should have been updated to reflect this:
|
||||
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
||||
- You must ensure that following a server start using `pg_ctl`, log output
|
||||
is not send to STDERR (the default behaviour). If logging is not configured,
|
||||
we recommend setting `logging_collector=on` in `postgresql.conf` and
|
||||
We recommend setting `logging_collector=on` in `postgresql.conf` and
|
||||
providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
|
||||
parameter.
|
||||
|
||||
@@ -1040,7 +900,7 @@ will have diverged slightly following the shutdown of the old master.
|
||||
|
||||
The utility `pg_rewind` provides an efficient way of doing this, however
|
||||
is not included in the core PostgreSQL distribution for versions 9.3 and 9.4.
|
||||
However, `pg_rewind` is available separately for these versions and we
|
||||
Hoever, `pg_rewind` is available separately for these versions and we
|
||||
strongly recommend its installation. To use it with versions 9.3 and 9.4,
|
||||
provide the command line option `--pg_rewind`, optionally with the
|
||||
path to the `pg_rewind` binary location if not installed in the PostgreSQL
|
||||
@@ -1049,10 +909,6 @@ path to the `pg_rewind` binary location if not installed in the PostgreSQL
|
||||
`pg_rewind` for versions 9.3 and 9.4 can be obtained from:
|
||||
https://github.com/vmware/pg_rewind
|
||||
|
||||
Note that building this version of `pg_rewind` requires the PostgreSQL source
|
||||
code. Also, PostgreSQL 9.3 does not provide `wal_log_hints`, meaning data
|
||||
checksums must have been enabled when the database was initialized.
|
||||
|
||||
If `pg_rewind` is not available, as a fallback `repmgr` will use `repmgr
|
||||
standby clone` to resynchronise the old master's data directory using
|
||||
`rsync`. However, in order to ensure all files are synchronised, the
|
||||
@@ -1075,13 +931,13 @@ recorded in the `repl_events` table.
|
||||
Note that this command will not stop the server itself or remove
|
||||
it from the replication cluster.
|
||||
|
||||
If the standby is not running, the command can be executed on another
|
||||
node by providing the id of the node to be unregistered using
|
||||
the command line parameter `--node`, e.g. executing the following
|
||||
command on the master server will unregister the standby with
|
||||
id 3:
|
||||
If the standby is not running, the standby record must be manually
|
||||
removed from the `repl_nodes` table with e.g.:
|
||||
|
||||
repmgr standby unregister -f /etc/repmgr.conf --node=3
|
||||
DELETE FROM repmgr_test.repl_nodes WHERE id = 3;
|
||||
|
||||
Adjust schema and node ID accordingly. A future `repmgr` release
|
||||
will make it possible to unregister failed standbys.
|
||||
|
||||
|
||||
Automatic failover with `repmgrd`
|
||||
@@ -1095,8 +951,8 @@ To use `repmgrd` for automatic failover, the following `repmgrd` options must
|
||||
be set in `repmgr.conf`:
|
||||
|
||||
failover=automatic
|
||||
promote_command='repmgr standby promote -f /etc/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /etc/repmgr.conf'
|
||||
promote_command='repmgr standby promote -f /etc/repmgr/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
|
||||
|
||||
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
|
||||
|
||||
@@ -1384,8 +1240,7 @@ The following event types are available:
|
||||
* `standby_switchover`
|
||||
* `standby_disconnect_manual`
|
||||
* `witness_create`
|
||||
* `witness_register`
|
||||
* `witness_unregister`
|
||||
* `witness_create`
|
||||
* `repmgrd_start`
|
||||
* `repmgrd_shutdown`
|
||||
* `repmgrd_failover_promote`
|
||||
@@ -1407,42 +1262,6 @@ In general `repmgr` can be upgraded as-is without any further action required,
|
||||
however feature releases may require the `repmgr` database to be upgraded.
|
||||
An SQL script will be provided - please check the release notes for details.
|
||||
|
||||
|
||||
Distribution-specific configuration
|
||||
-----------------------------------
|
||||
|
||||
`repmgr` is largely OS-agnostic and can be run on any UNIX-like environment
|
||||
including various Linux distributions, Solaris, macOS and the various BSDs.
|
||||
|
||||
However, often OS-specific configuration is required, particularly when
|
||||
dealing with system service management (e.g. stopping and starting the
|
||||
PostgreSQL server), file paths and configuration file locations.
|
||||
|
||||
### PostgreSQL server control
|
||||
|
||||
By default, `repmgr` will use PostgreSQL's standard `pg_ctl` utility to control
|
||||
a running PostgreSQL server. However it may be better to use the operating
|
||||
system's service management system, e.g. `systemd`. To specify which service
|
||||
control commands are used, the following `repmgr.conf` configuration settings
|
||||
are available:
|
||||
|
||||
service_start_command
|
||||
service_stop_command
|
||||
service_restart_command
|
||||
service_reload_command
|
||||
service_promote_command
|
||||
|
||||
See `repmgr.conf.sample` for further details.
|
||||
|
||||
### Binary directory
|
||||
|
||||
Some PostgreSQL system packages, such as those provided for Debian/Ubuntu, like
|
||||
to hide some PostgreSQL utility programs outside of the default path. To ensure
|
||||
`repmgr` finds all required executables, explicitly set `pg_bindir` to the
|
||||
appropriate location, e.g. for PostgreSQL 9.6 on Debian/Ubuntu this would be
|
||||
`/usr/lib/postgresql/9.6/bin/`.
|
||||
|
||||
|
||||
Reference
|
||||
---------
|
||||
|
||||
@@ -1566,31 +1385,17 @@ which contains connection details for the local database.
|
||||
|
||||
This command also requires the location of the witness server's data
|
||||
directory to be provided (`-D/--datadir`) as well as valid connection
|
||||
parameters for the master server. If not explicitly provided,
|
||||
database and user names will be extracted from the `conninfo` string in
|
||||
`repmgr.conf`.
|
||||
parameters for the master server.
|
||||
|
||||
By default this command will create a superuser and a repmgr user.
|
||||
The `repmgr` user name will be extracted from the `conninfo` string
|
||||
in `repmgr.conf`.
|
||||
|
||||
* `witness register`
|
||||
|
||||
This will set up the witness server configuration, including the witness
|
||||
server's copy of the `repmgr` meta database, on a running PostgreSQL
|
||||
instance and register the witness server with the master. It requires
|
||||
the same command line options as `witness create`.
|
||||
|
||||
* `witness unregister`
|
||||
|
||||
Removes the entry for a witness server from the `repl_nodes` table. This
|
||||
command will not shut down the witness server or remove its data directory.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each active node in the replication cluster. This
|
||||
command polls each registered server and shows its role (`master` / `standby` /
|
||||
`witness`) or `FAILED` if the node doesn't respond. It polls each server
|
||||
command polls each registered server and shows its role (master / standby /
|
||||
witness) or `FAILED` if the node doesn't respond. It polls each server
|
||||
directly and can be run on any node in the cluster; this is also useful
|
||||
when analyzing connectivity from a particular node.
|
||||
|
||||
@@ -1620,102 +1425,7 @@ which contains connection details for the local database.
|
||||
3,1
|
||||
|
||||
The first column is the node's ID, and the second column represents the
|
||||
node's status (0 = available, -1 = failed).
|
||||
|
||||
* `cluster matrix` and `cluster crosscheck`
|
||||
|
||||
These commands display connection information for each pair of
|
||||
nodes in the replication cluster.
|
||||
|
||||
- `cluster matrix` runs a `cluster show` on each node and arranges
|
||||
the results in a matrix, recording success or failure;
|
||||
|
||||
- `cluster crosscheck` runs a `cluster matrix` on each node and
|
||||
combines the results in a single matrix, providing a full
|
||||
overview of connections between all databases in the cluster.
|
||||
|
||||
These commands require a valid `repmgr.conf` file on each node.
|
||||
Additionally password-less `ssh` connections are required between
|
||||
all nodes.
|
||||
|
||||
Example 1 (all nodes up):
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | *
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | * | * | *
|
||||
|
||||
Here `cluster matrix` is sufficient to establish the state of each
|
||||
possible connection.
|
||||
|
||||
|
||||
Example 2 (node1 and `node2` up, `node3` down):
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | x
|
||||
node3 | 3 | ? | ? | ?
|
||||
|
||||
Each row corresponds to one server, and indicates the result of
|
||||
testing an outbound connection from that server.
|
||||
|
||||
Since `node3` is down, all the entries in its row are filled with
|
||||
"?", meaning that there we cannot test outbound connections.
|
||||
|
||||
The other two nodes are up; the corresponding rows have "x" in the
|
||||
column corresponding to node3, meaning that inbound connections to
|
||||
that node have failed, and "*" in the columns corresponding to
|
||||
node1 and node2, meaning that inbound connections to these nodes
|
||||
have succeeded.
|
||||
|
||||
In this case, `cluster crosscheck` gives the same result as `cluster
|
||||
matrix`, because from any functioning node we can observe the same
|
||||
state: `node1` and `node2` are up, `node3` is down.
|
||||
|
||||
Example 3 (all nodes up, firewall dropping packets originating
|
||||
from `node1` and directed to port 5432 on node3)
|
||||
|
||||
Running `cluster matrix` from `node1` gives the following output:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | ? | ? | ?
|
||||
|
||||
(Note this may take some time depending on the `connect_timeout`
|
||||
setting in the registered node `conninfo` strings; default is 1
|
||||
minute which means without modification the above command would
|
||||
take around 2 minutes to run; see comment elsewhere about setting
|
||||
`connect_timeout`)
|
||||
|
||||
The matrix tells us that we cannot connect from `node1` to `node3`,
|
||||
and that (therefore) we don't know the state of any outbound
|
||||
connection from node3.
|
||||
|
||||
In this case, the `cluster crosscheck` command is more informative:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster crosscheck
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | * | * | *
|
||||
|
||||
What happened is that `cluster crosscheck` merged its own `cluster
|
||||
matrix` with the `cluster matrix` output from `node2`; the latter is
|
||||
able to connect to `node3` and therefore determine the state of
|
||||
outbound connections from that node.
|
||||
|
||||
node's status (0 = master, 1 = standby, -1 = failed).
|
||||
|
||||
* `cluster cleanup`
|
||||
|
||||
@@ -1729,45 +1439,27 @@ which contains connection details for the local database.
|
||||
the current working directory; no additional arguments are required.
|
||||
|
||||
|
||||
### Further documentation
|
||||
|
||||
As well as this README, the `repmgr` source contains following additional
|
||||
documentation files:
|
||||
|
||||
* FAQ.md - frequently asked questions
|
||||
* CONTRIBUTING.md - how to contribute to `repmgr`
|
||||
* PACKAGES.md - details on building packages
|
||||
* SSH-RSYNC.md - how to set up passwordless SSH between nodes
|
||||
* docs/repmgrd-failover-mechanism.md - how repmgrd picks which node to promote
|
||||
* docs/repmgrd-node-fencing.md - how to "fence" a failed master node
|
||||
|
||||
|
||||
|
||||
|
||||
### Error codes
|
||||
|
||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
||||
exit:
|
||||
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||
(repmgr only)
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only)
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only)
|
||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
|
||||
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
|
||||
* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only)
|
||||
* ERR_REGISTRATION_SYNC (20) After registering a standby, local node record was not
|
||||
syncrhonised (repmgr only, with --wait option)
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error (repmgr only)
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only)
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only)
|
||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
|
||||
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
|
||||
|
||||
|
||||
Support and Assistance
|
||||
----------------------
|
||||
|
||||
80
config.c
80
config.c
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* config.c - Functions to parse the config file
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -27,7 +26,7 @@
|
||||
|
||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
static void exit_with_errors(ItemList *config_errors);
|
||||
static void exit_with_errors(ErrorList *config_errors);
|
||||
|
||||
const static char *_progname = NULL;
|
||||
static char config_file_path[MAXPGPATH];
|
||||
@@ -202,7 +201,7 @@ parse_config(t_configuration_options *options)
|
||||
char *conninfo_errmsg = NULL;
|
||||
|
||||
/* Collate configuration file errors here for friendlier reporting */
|
||||
static ItemList config_errors = { NULL, NULL };
|
||||
static ErrorList config_errors = { NULL, NULL };
|
||||
|
||||
bool node_found = false;
|
||||
|
||||
@@ -215,18 +214,14 @@ parse_config(t_configuration_options *options)
|
||||
options->upstream_node = NO_UPSTREAM_NODE;
|
||||
options->use_replication_slots = 0;
|
||||
memset(options->conninfo, 0, sizeof(options->conninfo));
|
||||
memset(options->barman_server, 0, sizeof(options->barman_server));
|
||||
memset(options->barman_config, 0, sizeof(options->barman_config));
|
||||
options->failover = MANUAL_FAILOVER;
|
||||
options->priority = DEFAULT_PRIORITY;
|
||||
memset(options->node_name, 0, sizeof(options->node_name));
|
||||
memset(options->promote_command, 0, sizeof(options->promote_command));
|
||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||
memset(options->service_stop_command, 0, sizeof(options->service_stop_command));
|
||||
memset(options->service_start_command, 0, sizeof(options->service_start_command));
|
||||
memset(options->service_restart_command, 0, sizeof(options->service_restart_command));
|
||||
memset(options->service_reload_command, 0, sizeof(options->service_reload_command));
|
||||
memset(options->service_promote_command, 0, sizeof(options->service_promote_command));
|
||||
memset(options->stop_command, 0, sizeof(options->stop_command));
|
||||
memset(options->start_command, 0, sizeof(options->start_command));
|
||||
memset(options->restart_command, 0, sizeof(options->restart_command));
|
||||
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
||||
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||
@@ -314,10 +309,6 @@ parse_config(t_configuration_options *options)
|
||||
options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors, false);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy(options->conninfo, value, MAXLEN);
|
||||
else if (strcmp(name, "barman_server") == 0)
|
||||
strncpy(options->barman_server, value, MAXLEN);
|
||||
else if (strcmp(name, "barman_config") == 0)
|
||||
strncpy(options->barman_config, value, MAXLEN);
|
||||
else if (strcmp(name, "rsync_options") == 0)
|
||||
strncpy(options->rsync_options, value, QUERY_STR_LEN);
|
||||
else if (strcmp(name, "ssh_options") == 0)
|
||||
@@ -342,7 +333,7 @@ parse_config(t_configuration_options *options)
|
||||
}
|
||||
else
|
||||
{
|
||||
item_list_append(&config_errors,_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
error_list_append(&config_errors,_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
}
|
||||
}
|
||||
else if (strcmp(name, "priority") == 0)
|
||||
@@ -353,16 +344,12 @@ parse_config(t_configuration_options *options)
|
||||
strncpy(options->promote_command, value, MAXLEN);
|
||||
else if (strcmp(name, "follow_command") == 0)
|
||||
strncpy(options->follow_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_stop_command") == 0)
|
||||
strncpy(options->service_stop_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_start_command") == 0)
|
||||
strncpy(options->service_start_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_restart_command") == 0)
|
||||
strncpy(options->service_restart_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_reload_command") == 0)
|
||||
strncpy(options->service_reload_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_promote_command") == 0)
|
||||
strncpy(options->service_promote_command, value, MAXLEN);
|
||||
else if (strcmp(name, "stop_command") == 0)
|
||||
strncpy(options->stop_command, value, MAXLEN);
|
||||
else if (strcmp(name, "start_command") == 0)
|
||||
strncpy(options->start_command, value, MAXLEN);
|
||||
else if (strcmp(name, "restart_command") == 0)
|
||||
strncpy(options->restart_command, value, MAXLEN);
|
||||
else if (strcmp(name, "master_response_timeout") == 0)
|
||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||
/*
|
||||
@@ -419,7 +406,7 @@ parse_config(t_configuration_options *options)
|
||||
_("no value provided for parameter \"%s\""),
|
||||
name);
|
||||
|
||||
item_list_append(&config_errors, error_message_buf);
|
||||
error_list_append(&config_errors, error_message_buf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -428,11 +415,11 @@ parse_config(t_configuration_options *options)
|
||||
|
||||
if (node_found == false)
|
||||
{
|
||||
item_list_append(&config_errors, _("\"node\": parameter was not found"));
|
||||
error_list_append(&config_errors, _("\"node\": parameter was not found"));
|
||||
}
|
||||
else if (options->node == 0)
|
||||
{
|
||||
item_list_append(&config_errors, _("\"node\": must be greater than zero"));
|
||||
error_list_append(&config_errors, _("\"node\": must be greater than zero"));
|
||||
}
|
||||
|
||||
if (strlen(options->conninfo))
|
||||
@@ -452,7 +439,7 @@ parse_config(t_configuration_options *options)
|
||||
_("\"conninfo\": %s"),
|
||||
conninfo_errmsg);
|
||||
|
||||
item_list_append(&config_errors, error_message_buf);
|
||||
error_list_append(&config_errors, error_message_buf);
|
||||
}
|
||||
|
||||
PQconninfoFree(conninfo_options);
|
||||
@@ -647,13 +634,6 @@ reload_config(t_configuration_options *orig_options)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* barman_server */
|
||||
if (strcmp(orig_options->barman_server, new_options.barman_server) != 0)
|
||||
{
|
||||
strcpy(orig_options->barman_server, new_options.barman_server);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* node */
|
||||
if (orig_options->node != new_options.node)
|
||||
{
|
||||
@@ -790,11 +770,11 @@ reload_config(t_configuration_options *orig_options)
|
||||
|
||||
|
||||
void
|
||||
item_list_append(ItemList *item_list, char *error_message)
|
||||
error_list_append(ErrorList *error_list, char *error_message)
|
||||
{
|
||||
ItemListCell *cell;
|
||||
ErrorListCell *cell;
|
||||
|
||||
cell = (ItemListCell *) pg_malloc0(sizeof(ItemListCell));
|
||||
cell = (ErrorListCell *) pg_malloc0(sizeof(ErrorListCell));
|
||||
|
||||
if (cell == NULL)
|
||||
{
|
||||
@@ -802,19 +782,19 @@ item_list_append(ItemList *item_list, char *error_message)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
cell->string = pg_malloc0(MAXLEN);
|
||||
strncpy(cell->string, error_message, MAXLEN);
|
||||
cell->error_message = pg_malloc0(MAXLEN);
|
||||
strncpy(cell->error_message, error_message, MAXLEN);
|
||||
|
||||
if (item_list->tail)
|
||||
if (error_list->tail)
|
||||
{
|
||||
item_list->tail->next = cell;
|
||||
error_list->tail->next = cell;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_list->head = cell;
|
||||
error_list->head = cell;
|
||||
}
|
||||
|
||||
item_list->tail = cell;
|
||||
error_list->tail = cell;
|
||||
}
|
||||
|
||||
|
||||
@@ -824,7 +804,7 @@ item_list_append(ItemList *item_list, char *error_message)
|
||||
* otherwise exit
|
||||
*/
|
||||
int
|
||||
repmgr_atoi(const char *value, const char *config_item, ItemList *error_list, bool allow_negative)
|
||||
repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list, bool allow_negative)
|
||||
{
|
||||
char *endptr;
|
||||
long longval = 0;
|
||||
@@ -873,7 +853,7 @@ repmgr_atoi(const char *value, const char *config_item, ItemList *error_list, bo
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
item_list_append(error_list, error_message_buf);
|
||||
error_list_append(error_list, error_message_buf);
|
||||
}
|
||||
|
||||
return (int32) longval;
|
||||
@@ -1015,15 +995,15 @@ parse_event_notifications_list(t_configuration_options *options, const char *arg
|
||||
|
||||
|
||||
static void
|
||||
exit_with_errors(ItemList *config_errors)
|
||||
exit_with_errors(ErrorList *config_errors)
|
||||
{
|
||||
ItemListCell *cell;
|
||||
ErrorListCell *cell;
|
||||
|
||||
log_err(_("%s: following errors were found in the configuration file.\n"), progname());
|
||||
|
||||
for (cell = config_errors->head; cell; cell = cell->next)
|
||||
{
|
||||
log_err("%s\n", cell->string);
|
||||
log_err("%s\n", cell->error_message);
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
53
config.h
53
config.h
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* config.h
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -58,20 +57,14 @@ typedef struct
|
||||
int node;
|
||||
int upstream_node;
|
||||
char conninfo[MAXLEN];
|
||||
char barman_server[MAXLEN];
|
||||
char barman_config[MAXLEN];
|
||||
int failover;
|
||||
int priority;
|
||||
char node_name[MAXLEN];
|
||||
/* commands executed by repmgrd */
|
||||
char promote_command[MAXLEN];
|
||||
char follow_command[MAXLEN];
|
||||
/* Overrides for pg_ctl commands */
|
||||
char service_stop_command[MAXLEN];
|
||||
char service_start_command[MAXLEN];
|
||||
char service_restart_command[MAXLEN];
|
||||
char service_reload_command[MAXLEN];
|
||||
char service_promote_command[MAXLEN];
|
||||
char stop_command[MAXLEN];
|
||||
char start_command[MAXLEN];
|
||||
char restart_command[MAXLEN];
|
||||
char loglevel[MAXLEN];
|
||||
char logfacility[MAXLEN];
|
||||
char rsync_options[QUERY_STR_LEN];
|
||||
@@ -97,35 +90,19 @@ typedef struct
|
||||
* The following will initialize the structure with a minimal set of options;
|
||||
* actual defaults are set in parse_config() before parsing the configuration file
|
||||
*/
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", "", "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
typedef struct ItemListCell
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
struct ItemListCell *next;
|
||||
char *string;
|
||||
} ItemListCell;
|
||||
struct ErrorListCell *next;
|
||||
char *error_message;
|
||||
} ErrorListCell;
|
||||
|
||||
typedef struct ItemList
|
||||
typedef struct ErrorList
|
||||
{
|
||||
ItemListCell *head;
|
||||
ItemListCell *tail;
|
||||
} ItemList;
|
||||
|
||||
typedef struct TablespaceDataListCell
|
||||
{
|
||||
struct TablespaceDataListCell *next;
|
||||
char *name;
|
||||
char *oid;
|
||||
char *location;
|
||||
/* optional payload */
|
||||
FILE *f;
|
||||
} TablespaceDataListCell;
|
||||
|
||||
typedef struct TablespaceDataList
|
||||
{
|
||||
TablespaceDataListCell *head;
|
||||
TablespaceDataListCell *tail;
|
||||
} TablespaceDataList;
|
||||
ErrorListCell *head;
|
||||
ErrorListCell *tail;
|
||||
} ErrorList;
|
||||
|
||||
void set_progname(const char *argv0);
|
||||
const char * progname(void);
|
||||
@@ -135,10 +112,10 @@ bool reload_config(t_configuration_options *orig_options);
|
||||
bool parse_config(t_configuration_options *options);
|
||||
void parse_line(char *buff, char *name, char *value);
|
||||
char *trim(char *s);
|
||||
void item_list_append(ItemList *item_list, char *error_message);
|
||||
void error_list_append(ErrorList *error_list, char *error_message);
|
||||
int repmgr_atoi(const char *s,
|
||||
const char *config_item,
|
||||
ItemList *error_list,
|
||||
ErrorList *error_list,
|
||||
bool allow_negative);
|
||||
extern bool config_file_found;
|
||||
|
||||
#endif
|
||||
|
||||
50
dbutils.c
50
dbutils.c
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* dbutils.c - Database connection/management functions
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -214,7 +213,7 @@ check_cluster_schema(PGconn *conn)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = '%s'",
|
||||
"SELECT 1 FROM pg_namespace WHERE nspname = '%s'",
|
||||
get_repmgr_schema());
|
||||
|
||||
log_verbose(LOG_DEBUG, "check_cluster_schema(): %s\n", sqlquery);
|
||||
@@ -409,7 +408,7 @@ guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||
int retval = 1;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT true FROM pg_catalog.pg_settings "
|
||||
"SELECT true FROM pg_settings "
|
||||
" WHERE name = '%s' AND setting %s '%s'",
|
||||
parameter, op, value);
|
||||
|
||||
@@ -445,7 +444,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
int retval = 1;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT true FROM pg_catalog.pg_settings "
|
||||
"SELECT true FROM pg_settings "
|
||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||
parameter, datatype, op, value, datatype);
|
||||
|
||||
@@ -477,7 +476,7 @@ get_cluster_size(PGconn *conn, char *size)
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT pg_catalog.pg_size_pretty(SUM(pg_catalog.pg_database_size(oid))::bigint) "
|
||||
" FROM pg_catalog.pg_database ");
|
||||
" FROM pg_database ");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_cluster_size():\n%s\n", sqlquery);
|
||||
|
||||
@@ -504,11 +503,11 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
int i;
|
||||
bool success = false;
|
||||
bool success = true;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT name, setting "
|
||||
" FROM pg_catalog.pg_settings WHERE name = '%s'",
|
||||
" FROM pg_settings WHERE name = '%s'",
|
||||
setting);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_pg_setting(): %s\n", sqlquery);
|
||||
@@ -945,7 +944,7 @@ get_repmgr_schema_quoted(PGconn *conn)
|
||||
|
||||
|
||||
bool
|
||||
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg)
|
||||
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int query_res;
|
||||
@@ -964,9 +963,8 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
||||
{
|
||||
if (strcmp(slot_info.slot_type, "physical") != 0)
|
||||
{
|
||||
appendPQExpBuffer(error_msg,
|
||||
_("Slot '%s' exists and is not a physical slot\n"),
|
||||
slot_name);
|
||||
log_err(_("Slot '%s' exists and is not a physical slot\n"),
|
||||
slot_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -978,9 +976,8 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
||||
return true;
|
||||
}
|
||||
|
||||
appendPQExpBuffer(error_msg,
|
||||
_("Slot '%s' already exists as an active slot\n"),
|
||||
slot_name);
|
||||
log_err(_("Slot '%s' already exists as an active slot\n"),
|
||||
slot_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -988,26 +985,25 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
||||
if (server_version_num >= 90600)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_catalog.pg_create_physical_replication_slot('%s', TRUE)",
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
|
||||
slot_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_catalog.pg_create_physical_replication_slot('%s')",
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
}
|
||||
|
||||
log_debug(_("create_replication_slot(): Creating slot '%s' on master\n"), slot_name);
|
||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
appendPQExpBuffer(error_msg,
|
||||
_("unable to create slot '%s' on the master node: %s\n"),
|
||||
slot_name,
|
||||
PQerrorMessage(conn));
|
||||
log_err(_("unable to create slot '%s' on the primary node: %s\n"),
|
||||
slot_name,
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
@@ -1025,7 +1021,7 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT slot_name, slot_type, active "
|
||||
" FROM pg_catalog.pg_replication_slots "
|
||||
" FROM pg_replication_slots "
|
||||
" WHERE slot_name = '%s' ",
|
||||
slot_name);
|
||||
|
||||
@@ -1227,8 +1223,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
|
||||
|
||||
/* Get current records from primary */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active "
|
||||
" FROM %s.repl_nodes",
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||
get_repmgr_schema_quoted(masterconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
@@ -1342,8 +1337,7 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes "
|
||||
" (id, type, upstream_node_id, cluster, "
|
||||
" name, conninfo, slot_name, "
|
||||
" priority, active) "
|
||||
" name, conninfo, slot_name, priority, active) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
node,
|
||||
@@ -1489,6 +1483,7 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
||||
PQerrorMessage(conn));
|
||||
|
||||
success = false;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1718,8 +1713,7 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, "
|
||||
" slot_name, priority, active"
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* dbutils.h
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -22,7 +21,6 @@
|
||||
#define _REPMGR_DBUTILS_H_
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
#include "pqexpbuffer.h"
|
||||
|
||||
#include "config.h"
|
||||
#include "strutil.h"
|
||||
@@ -79,7 +77,7 @@ typedef struct s_replication_slot
|
||||
bool active;
|
||||
} t_replication_slot;
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
|
||||
|
||||
PGconn *_establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error,
|
||||
@@ -119,7 +117,7 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
|
||||
2
debian/DEBIAN/control
vendored
2
debian/DEBIAN/control
vendored
@@ -1,5 +1,5 @@
|
||||
Package: repmgr-auto
|
||||
Version: 3.2dev
|
||||
Version: 3.1.3
|
||||
Section: database
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
repmgrd's failover algorithm
|
||||
============================
|
||||
|
||||
When implementing automatic failover, there are two factors which are critical in
|
||||
ensuring the desired result is achieved:
|
||||
|
||||
- has the master node genuinely failed?
|
||||
- which is the best node to promote to the new master?
|
||||
|
||||
This document outlines repmgrd's decision-making process during automatic failover
|
||||
for standbys directly connected to the master node.
|
||||
|
||||
|
||||
Master node failure detection
|
||||
-----------------------------
|
||||
|
||||
If a `repmgrd` instance running on a PostgreSQL standby node is unable to connect to
|
||||
the master node, this doesn't neccesarily mean that the master is down and a
|
||||
failover is required. Factors such as network connectivity issues could mean that
|
||||
even though the standby node is isolated, the replication cluster as a whole
|
||||
is functioning correctly, and promoting the standby without further verification
|
||||
could result in a "split-brain" situation.
|
||||
|
||||
In the event that `repmgrd` is unable to connect to the master node, it will attempt
|
||||
to reconnect to the master server several times (as defined by the `reconnect_attempts`
|
||||
parameter in `repmgr.conf`), with reconnection attempts occuring at the interval
|
||||
specified by `reconnect_interval`. This happens to verify that the master is definitively
|
||||
not accessible (e.g. that connection was not lost due to a brief network glitch).
|
||||
|
||||
Appropriate values for these settings will depend very much on the replication
|
||||
cluster environment. There will necessarily be a trade-off between the time it
|
||||
takes to assume the master is not reachable, and the reliability of that conclusion.
|
||||
A standby in a different physical location to the master will probably need a longer
|
||||
check interval to rule out possible network issues, whereas one located in the same
|
||||
rack with a direct connection between servers could perform the check very quickly.
|
||||
|
||||
Note that it's possible the master comes back online after this point is reached,
|
||||
but before a new master has been selected; in this case it will be noticed
|
||||
during the selection of a new master and no actual failover will take place.
|
||||
|
||||
Promotion candidate selection
|
||||
-----------------------------
|
||||
|
||||
Once `repmgrd` has decided the master is definitively unreachable, following checks
|
||||
will be carried out:
|
||||
|
||||
* attempts to connect to all other nodes in the cluster (including the witness
|
||||
node, if defined) to establish the state of the cluster, including their
|
||||
current LSN
|
||||
|
||||
* If less than half of the nodes are visible (from the viewpoint
|
||||
of this node), `repmgrd` will not take any further action. This is to ensure that
|
||||
e.g. if a replication cluster is spread over multiple data centres, a split-brain
|
||||
situation does not occur if there is a network failure between datacentres. Note
|
||||
that if nodes are split evenly between data centres, a witness server can be
|
||||
used to establish the "majority" daat centre.
|
||||
|
||||
* `repmgrd` polls all visible servers and waits for each node to return a valid LSN;
|
||||
it updates the LSN previously stored for this node if it has increased since
|
||||
the initial check
|
||||
|
||||
* once all LSNs have been retrieved, `repmgrd` will check for the highest LSN; if
|
||||
its own node has the highest LSN, it will attempt to promote itself (using the
|
||||
command defined in `promote_command` in `repmgr.conf`. Note that if using
|
||||
`repmgr standby promote` as the promotion command, and the original master becomes available
|
||||
before the promotion takes effect, `repmgr` will return an error and no promotion
|
||||
will take place, and `repmgrd` will resume monitoring as usual.
|
||||
|
||||
* if the node is not the promotion candidate, `repmgrd` will execute the
|
||||
`follow_command` defined in `repmgr.conf`. If using `repmgr standby follow` here,
|
||||
`repmgr` will attempt to detect the new master node and attach to that.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
Fencing a failed master node with repmgrd and pgbouncer
|
||||
=======================================================
|
||||
|
||||
With automatic failover, it's essential to ensure that a failed master
|
||||
remains inaccessible to your application, even if it comes back online
|
||||
again, to avoid a split-brain situation.
|
||||
|
||||
By using `pgbouncer` together with `repmgrd`, it's possible to combine
|
||||
automatic failover with a process to isolate the failed master from
|
||||
your application and ensure that all connections which should go to
|
||||
the master are directed there smoothly without having to reconfigure
|
||||
your application. (Note that as a connection pooler, `pgbouncer` can
|
||||
benefit your application in other ways, but those are beyond the scope
|
||||
of this document).
|
||||
|
||||
* * *
|
||||
|
||||
> *WARNING*: automatic failover is tricky to get right. This document
|
||||
> demonstrates one possible implementation method, however you should
|
||||
> carefully configure and test any setup to suit the needs of your own
|
||||
> replication cluster/application.
|
||||
|
||||
* * *
|
||||
|
||||
In a failover situation, `repmgrd` promotes a standby to master by
|
||||
executing the command defined in `promote_command`. Normally this
|
||||
would be something like:
|
||||
|
||||
repmgr standby promote -f /etc/repmgr.conf
|
||||
|
||||
By wrapping this in a custom script which adjusts the `pgbouncer`
|
||||
configuration on all nodes, it's possible to fence the failed master
|
||||
and redirect write connections to the new master.
|
||||
|
||||
The script consists of three sections:
|
||||
|
||||
* commands to pause `pgbouncer` on all nodes
|
||||
* the promotion command itself
|
||||
* commands to reconfigure and restart `pgbouncer` on all nodes
|
||||
|
||||
Note that it requires password-less SSH access between all nodes to be
|
||||
able to update the `pgbouncer` configuration files.
|
||||
|
||||
For the purposes of this demonstration, we'll assume there are 3 nodes
|
||||
(master and two standbys), with `pgbouncer` listening on port 6432
|
||||
handling connections to a database called `appdb`. The `postgres`
|
||||
system user must have write access to the `pgbouncer` configuration
|
||||
file on all nodes, assumed to be at `/etc/pgbouncer.ini`.
|
||||
|
||||
The script also requires a template file containing global `pgbouncer`
|
||||
configuration, which should looks something like this (adjust
|
||||
settings appropriately for your environment):
|
||||
|
||||
`/var/lib/postgres/repmgr/pgbouncer.ini.template`
|
||||
|
||||
[pgbouncer]
|
||||
|
||||
logfile = /var/log/pgbouncer/pgbouncer.log
|
||||
pidfile = /var/run/pgbouncer/pgbouncer.pid
|
||||
|
||||
listen_addr = *
|
||||
listen_port = 6532
|
||||
unix_socket_dir = /tmp
|
||||
|
||||
auth_type = trust
|
||||
auth_file = /etc/pgbouncer.auth
|
||||
|
||||
admin_users = postgres
|
||||
stats_users = postgres
|
||||
|
||||
pool_mode = transaction
|
||||
|
||||
max_client_conn = 100
|
||||
default_pool_size = 20
|
||||
min_pool_size = 5
|
||||
reserve_pool_size = 5
|
||||
reserve_pool_timeout = 3
|
||||
|
||||
log_connections = 1
|
||||
log_disconnections = 1
|
||||
log_pooler_errors = 1
|
||||
|
||||
The actual script is as follows; adjust the configurable items as appropriate:
|
||||
|
||||
`/var/lib/postgres/repmgr/promote.sh`
|
||||
|
||||
|
||||
#!/usr/bin/env bash
|
||||
set -u
|
||||
set -e
|
||||
|
||||
# Configurable items
|
||||
PGBOUNCER_HOSTS="node1 node2 node3"
|
||||
REPMGR_DB="repmgr"
|
||||
REPMGR_USER="repmgr"
|
||||
REPMGR_SCHEMA="repmgr_test"
|
||||
PGBOUNCER_CONFIG="/etc/pgbouncer.ini"
|
||||
PGBOUNCER_INI_TEMPLATE="/var/lib/postgres/repmgr/pgbouncer.ini.template"
|
||||
PGBOUNCER_DATABASE="appdb"
|
||||
|
||||
# 1. Pause running pgbouncer instances
|
||||
for HOST in $PGBOUNCER_HOSTS
|
||||
do
|
||||
psql -t -c "pause" -h $HOST -p $PORT -U postgres pgbouncer
|
||||
done
|
||||
|
||||
|
||||
# 2. Promote this node from standby to master
|
||||
|
||||
repmgr standby promote -f /etc/repmgr.conf
|
||||
|
||||
|
||||
# 3. Reconfigure pgbouncer instances
|
||||
|
||||
PGBOUNCER_INI_NEW="/tmp/pgbouncer.ini.new"
|
||||
|
||||
for HOST in $PGBOUNCER_HOSTS
|
||||
do
|
||||
# Recreate the pgbouncer config file
|
||||
echo -e "[databases]\n" > $PGBOUNCER_INI_NEW
|
||||
|
||||
psql -d $REPMGR_DB -U $REPMGR_USER -t -A \
|
||||
-c "SELECT '$PGBOUNCER_DATABASE= ' || conninfo || ' application_name=pgbouncer_$HOST' \
|
||||
FROM $REPMGR_SCHEMA.repl_nodes \
|
||||
WHERE active = TRUE AND type='master'" >> $PGBOUNCER_INI_NEW
|
||||
|
||||
cat $PGBOUNCER_INI_TEMPLATE >> $PGBOUNCER_INI_NEW
|
||||
|
||||
rsync $PGBOUNCER_INI_NEW $HOST:$PGBOUNCER_CONFIG
|
||||
|
||||
psql -tc "reload" -h $HOST -U postgres pgbouncer
|
||||
psql -tc "resume" -h $HOST -U postgres pgbouncer
|
||||
|
||||
done
|
||||
|
||||
# Clean up generated file
|
||||
rm $PGBOUNCER_INI_NEW
|
||||
|
||||
echo "Reconfiguration of pgbouncer complete"
|
||||
|
||||
Script and template file should be installed on each node where
|
||||
`repmgrd` is running.
|
||||
|
||||
Finally, set `promote_command` in `repmgr.conf` on each node to
|
||||
point to the custom promote script:
|
||||
|
||||
promote_command=/var/lib/postgres/repmgr/promote.sh
|
||||
|
||||
and reload/restart any running `repmgrd` instances for the changes to take
|
||||
effect.
|
||||
@@ -38,8 +38,5 @@
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
#define ERR_BAD_BACKUP_LABEL 17
|
||||
#define ERR_SWITCHOVER_FAIL 18
|
||||
#define ERR_BARMAN 19
|
||||
#define ERR_REGISTRATION_SYNC 20
|
||||
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#
|
||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
#
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr user=repmgr'
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
#
|
||||
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
||||
# conninfo string to determine the length of time which elapses before
|
||||
@@ -100,14 +100,11 @@
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
#pg_bindir=/usr/bin/
|
||||
#
|
||||
# Debian/Ubuntu users: you will probably need to set this to the directory
|
||||
# where `pg_ctl` is located, e.g. /usr/lib/postgresql/9.5/bin/
|
||||
|
||||
# service control commands
|
||||
#
|
||||
# repmgr provides options to override the default pg_ctl commands
|
||||
# used to stop, start, restart, reload and promote the PostgreSQL cluster
|
||||
# repmgr provides options to to override the default pg_ctl commands
|
||||
# used to stop, start and restart the PostgreSQL cluster
|
||||
#
|
||||
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
||||
# to function correctly.
|
||||
@@ -123,11 +120,9 @@
|
||||
# /usr/bin/systemctl start postgresql-9.5, \
|
||||
# /usr/bin/systemctl restart postgresql-9.5
|
||||
#
|
||||
# service_start_command = systemctl start postgresql-9.5
|
||||
# service_stop_command = systemctl stop postgresql-9.5
|
||||
# service_restart_command = systemctl restart postgresql-9.5
|
||||
# service_reload_command = pg_ctlcluster 9.5 main reload
|
||||
# service_promote_command = pg_ctlcluster 9.5 main promote
|
||||
# start_command = systemctl start postgresql-9.5
|
||||
# stop_command = systemctl stop postgresql-9.5
|
||||
# restart_command = systemctl restart postgresql-9.5
|
||||
|
||||
# external command options
|
||||
|
||||
@@ -160,9 +155,6 @@
|
||||
# These settings are only applied when repmgrd is running. Values shown
|
||||
# are defaults.
|
||||
|
||||
# monitoring interval in seconds; default is 2
|
||||
#monitor_interval_secs=2
|
||||
|
||||
# Number of seconds to wait for a response from the primary server before
|
||||
# deciding it has failed.
|
||||
|
||||
@@ -190,6 +182,9 @@
|
||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# monitoring interval in seconds; default is 2
|
||||
#monitor_interval_secs=2
|
||||
|
||||
# change wait time for primary; before we bail out and exit when the primary
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
|
||||
177
repmgr.h
177
repmgr.h
@@ -23,7 +23,6 @@
|
||||
#include <libpq-fe.h>
|
||||
#include <postgres_fe.h>
|
||||
#include <getopt_long.h>
|
||||
#include "pqexpbuffer.h"
|
||||
|
||||
#include "strutil.h"
|
||||
#include "dbutils.h"
|
||||
@@ -36,7 +35,7 @@
|
||||
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "0"
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
#define DEFAULT_DEST_DIR "."
|
||||
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
@@ -48,121 +47,64 @@
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
#define UNKNOWN_NODE_ID -1
|
||||
|
||||
/* command line options without short versions */
|
||||
#define OPT_HELP 1
|
||||
#define OPT_CHECK_UPSTREAM_CONFIG 2
|
||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 3
|
||||
#define OPT_COPY_EXTERNAL_CONFIG_FILES 4
|
||||
#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 4
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 5
|
||||
#define OPT_PG_REWIND 6
|
||||
#define OPT_PWPROMPT 7
|
||||
#define OPT_CSV 8
|
||||
#define OPT_NODE 9
|
||||
#define OPT_WITHOUT_BARMAN 10
|
||||
#define OPT_NO_UPSTREAM_CONNECTION 11
|
||||
#define OPT_REGISTER_WAIT 12
|
||||
#define OPT_CLUSTER 13
|
||||
|
||||
/* deprecated command line options */
|
||||
#define OPT_INITDB_NO_PWPROMPT 998
|
||||
#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 999
|
||||
|
||||
/* values for --copy-external-config-files */
|
||||
#define CONFIG_FILE_SAMEPATH 1
|
||||
#define CONFIG_FILE_PGDATA 2
|
||||
#define OPT_INITDB_NO_PWPROMPT 9
|
||||
|
||||
|
||||
/* Run time options type */
|
||||
typedef struct
|
||||
{
|
||||
/* general repmgr options */
|
||||
char config_file[MAXPGPATH];
|
||||
bool verbose;
|
||||
bool terse;
|
||||
bool force;
|
||||
|
||||
/* options which override setting in repmgr.conf */
|
||||
char loglevel[MAXLEN];
|
||||
char pg_bindir[MAXLEN];
|
||||
|
||||
/* connection parameters */
|
||||
char dbname[MAXLEN];
|
||||
char host[MAXLEN];
|
||||
char username[MAXLEN];
|
||||
char dest_dir[MAXPGPATH];
|
||||
char config_file[MAXPGPATH];
|
||||
char remote_user[MAXLEN];
|
||||
char superuser[MAXLEN];
|
||||
char masterport[MAXLEN];
|
||||
bool conninfo_provided;
|
||||
bool connection_param_provided;
|
||||
bool host_param_provided;
|
||||
|
||||
/* standby clone parameters */
|
||||
bool wal_keep_segments_used;
|
||||
char wal_keep_segments[MAXLEN];
|
||||
bool verbose;
|
||||
bool terse;
|
||||
bool force;
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool witness_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
bool without_barman;
|
||||
bool no_upstream_connection;
|
||||
bool copy_external_config_files;
|
||||
int copy_external_config_files_destination;
|
||||
bool wait_register_sync;
|
||||
int wait_register_sync_seconds;
|
||||
bool ignore_external_config_files;
|
||||
bool csv_mode;
|
||||
char masterport[MAXLEN];
|
||||
/*
|
||||
* configuration file parameters which can be overridden on the
|
||||
* command line
|
||||
*/
|
||||
char loglevel[MAXLEN];
|
||||
|
||||
/* parameter used by STANDBY SWITCHOVER */
|
||||
char remote_config_file[MAXLEN];
|
||||
char pg_rewind[MAXPGPATH];
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||
char config_archive_dir[MAXLEN];
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
int keep_history;
|
||||
|
||||
char pg_bindir[MAXLEN];
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
|
||||
/* witness create parameters */
|
||||
bool witness_pwprompt;
|
||||
|
||||
/* standby follow parameters */
|
||||
bool wait_for_master;
|
||||
|
||||
/* cluster {show|matrix|crosscheck} parameters */
|
||||
bool csv_mode;
|
||||
|
||||
/* cluster cleanup parameters */
|
||||
int keep_history;
|
||||
|
||||
/* standby switchover parameters */
|
||||
char remote_config_file[MAXLEN];
|
||||
bool pg_rewind_supplied;
|
||||
char pg_rewind[MAXPGPATH];
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
|
||||
/* standby {archive_config | restore_config} parameters */
|
||||
char config_archive_dir[MAXLEN];
|
||||
|
||||
/* {standby|witness} unregister parameters */
|
||||
int node;
|
||||
|
||||
/* deprecated command line options */
|
||||
char localport[MAXLEN];
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { \
|
||||
/* general repmgr options */ \
|
||||
"", false, false, false, \
|
||||
/* options which override setting in repmgr.conf */ \
|
||||
"", "", \
|
||||
/* connection parameters */ \
|
||||
"", "", "", "", "", "", "", \
|
||||
false, false, false, \
|
||||
/* standby clone parameters */ \
|
||||
false, DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, \
|
||||
CONFIG_FILE_SAMEPATH, false, 0, "", \
|
||||
/* witness create parameters */ \
|
||||
false, \
|
||||
/* standby follow parameters */ \
|
||||
false, \
|
||||
/* cluster {show|matrix|crosscheck} parameters */ \
|
||||
false, \
|
||||
/* cluster cleanup parameters */ \
|
||||
0, \
|
||||
/* standby switchover parameters */ \
|
||||
"", false, "", "fast", \
|
||||
/* standby {archive_config | restore_config} parameters */ \
|
||||
"", \
|
||||
/* {standby|witness} unregister parameters */ \
|
||||
UNKNOWN_NODE_ID }
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", ""}
|
||||
|
||||
struct BackupLabel
|
||||
{
|
||||
@@ -176,60 +118,7 @@ struct BackupLabel
|
||||
XLogRecPtr min_failover_slot_lsn;
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char slot[MAXLEN];
|
||||
char xlog_method[MAXLEN];
|
||||
} t_basebackup_options;
|
||||
|
||||
#define T_BASEBACKUP_OPTIONS_INITIALIZER { "", "" }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int size;
|
||||
char **keywords;
|
||||
char **values;
|
||||
} t_conninfo_param_list;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char filepath[MAXPGPATH];
|
||||
char filename[MAXPGPATH];
|
||||
bool in_data_directory;
|
||||
} t_configfile_info;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int size;
|
||||
int entries;
|
||||
t_configfile_info **files;
|
||||
} t_configfile_list;
|
||||
|
||||
#define T_CONFIGFILE_LIST_INITIALIZER { 0, 0, NULL }
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
int node_status;
|
||||
} t_node_status_rec;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[MAXLEN];
|
||||
t_node_status_rec **node_status_list;
|
||||
} t_node_matrix_rec;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[MAXLEN];
|
||||
t_node_matrix_rec **matrix_list_rec;
|
||||
} t_node_status_cube;
|
||||
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
extern bool config_file_found;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -64,7 +64,7 @@ CREATE INDEX idx_repl_status_sort ON repl_monitor(last_monitor_time, standby_nod
|
||||
* This view shows the list of nodes with the information of which one is the upstream
|
||||
* in each case (when appliable)
|
||||
*/
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
SELECT rn.id, rn.conninfo, rn.type, rn.name, rn.cluster,
|
||||
rn.priority, rn.active, sq.name AS upstream_node_name
|
||||
FROM repl_nodes as rn LEFT JOIN repl_nodes AS sq ON sq.id=rn.upstream_node_id;
|
||||
|
||||
224
repmgrd.c
224
repmgrd.c
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* repmgrd.c - Replication manager daemon
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This module connects to the nodes of a replication cluster and monitors
|
||||
@@ -111,15 +110,17 @@ static void check_and_create_pid_file(const char *pid_file);
|
||||
static void
|
||||
close_connections()
|
||||
{
|
||||
if (PQstatus(master_conn) == CONNECTION_OK && PQisBusy(master_conn) == 1)
|
||||
if (master_conn != NULL && PQisBusy(master_conn) == 1)
|
||||
cancel_query(master_conn, local_options.master_response_timeout);
|
||||
|
||||
|
||||
if (PQstatus(my_local_conn) == CONNECTION_OK)
|
||||
if (my_local_conn != NULL)
|
||||
PQfinish(my_local_conn);
|
||||
|
||||
if (PQstatus(master_conn) == CONNECTION_OK)
|
||||
if (master_conn != NULL && master_conn != my_local_conn)
|
||||
PQfinish(master_conn);
|
||||
|
||||
master_conn = NULL;
|
||||
my_local_conn = NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -311,46 +312,10 @@ main(int argc, char **argv)
|
||||
|
||||
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
||||
|
||||
/*
|
||||
* Check if node record is active - if not, and `failover=automatic`, the node
|
||||
* won't be considered as a promotion candidate; this often happens when
|
||||
* a failed primary is recloned and the node was not re-registered, giving
|
||||
* the impression failover capability is there when it's not. In this case
|
||||
* abort with an error and a hint about registering.
|
||||
*
|
||||
* If `failover=manual`, repmgrd can continue to passively monitor the node, but
|
||||
* we should nevertheless issue a warning and the same hint.
|
||||
*/
|
||||
|
||||
if (node_info.active == false)
|
||||
{
|
||||
char *hint = "Check that 'repmgr (master|standby) register' was executed for this node";
|
||||
|
||||
switch (local_options.failover)
|
||||
{
|
||||
case AUTOMATIC_FAILOVER:
|
||||
log_err(_("This node is marked as inactive and cannot be used for failover\n"));
|
||||
log_hint(_("%s\n"), hint);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
|
||||
case MANUAL_FAILOVER:
|
||||
log_warning(_("This node is marked as inactive and will be passively monitored only\n"));
|
||||
log_hint(_("%s\n"), hint);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* This should never happen */
|
||||
log_err(_("Unknown failover mode %i\n"), local_options.failover);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* MAIN LOOP This loops cycles at startup and once per failover and
|
||||
* Requisites:
|
||||
* - my_local_conn must have an active connection to the monitored node
|
||||
* - master_conn must not be open
|
||||
* Requisites: - my_local_conn needs to be already setted with an active
|
||||
* connection - no master connection
|
||||
*/
|
||||
do
|
||||
{
|
||||
@@ -462,7 +427,7 @@ main(int argc, char **argv)
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
|
||||
if (PQstatus(master_conn) != CONNECTION_OK)
|
||||
if (master_conn == NULL)
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
initPQExpBuffer(&errmsg);
|
||||
@@ -647,7 +612,7 @@ witness_monitor(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("new master found with node ID: %i\n"), master_options.node);
|
||||
log_debug(_("new master found with node ID: %i\n"), master_options.node);
|
||||
connection_ok = true;
|
||||
|
||||
/*
|
||||
@@ -751,14 +716,13 @@ static void
|
||||
standby_monitor(void)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
char monitor_standby_timestamp[MAXLEN];
|
||||
char last_wal_primary_location[MAXLEN];
|
||||
char last_xlog_receive_location[MAXLEN];
|
||||
char last_xlog_replay_location[MAXLEN];
|
||||
char last_xact_replay_timestamp[MAXLEN];
|
||||
bool receiving_streamed_wal = true;
|
||||
bool last_xlog_receive_location_gte_replayed;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
XLogRecPtr lsn_master_current_xlog_location;
|
||||
XLogRecPtr lsn_last_xlog_receive_location;
|
||||
@@ -779,6 +743,7 @@ standby_monitor(void)
|
||||
int active_master_id;
|
||||
const char *upstream_node_type = NULL;
|
||||
|
||||
bool receiving_streamed_wal = true;
|
||||
|
||||
|
||||
/*
|
||||
@@ -832,12 +797,13 @@ standby_monitor(void)
|
||||
/*
|
||||
* Check that the upstream node is still available
|
||||
* If not, initiate failover process
|
||||
*
|
||||
* This takes up to local_options.reconnect_attempts *
|
||||
* local_options.reconnect_interval seconds
|
||||
*/
|
||||
|
||||
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
||||
/*
|
||||
* This takes up to local_options.reconnect_attempts *
|
||||
* local_options.reconnect_interval seconds
|
||||
*/
|
||||
|
||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||
{
|
||||
@@ -1113,21 +1079,14 @@ standby_monitor(void)
|
||||
if (wait_connection_availability(master_conn, local_options.master_response_timeout) != 1)
|
||||
return;
|
||||
|
||||
/* Get local xlog info
|
||||
*
|
||||
* If receive_location is NULL, we're in archive recovery and not streaming WAL
|
||||
* If receive_location is less than replay location, we were streaming WAL but are
|
||||
* somehow disconnected and evidently in archive recovery
|
||||
*/
|
||||
/* Get local xlog info */
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT ts, "
|
||||
" CASE WHEN (receive_location IS NULL OR receive_location < replay_location) "
|
||||
" THEN replay_location "
|
||||
" ELSE receive_location"
|
||||
" END AS receive_location,"
|
||||
" receive_location, "
|
||||
" replay_location, "
|
||||
" replay_timestamp, "
|
||||
" COALESCE(receive_location, '0/0') >= replay_location AS receiving_streamed_wal "
|
||||
" receive_location >= replay_location "
|
||||
" FROM (SELECT CURRENT_TIMESTAMP AS ts, "
|
||||
" pg_catalog.pg_last_xlog_receive_location() AS receive_location, "
|
||||
" pg_catalog.pg_last_xlog_replay_location() AS replay_location, "
|
||||
@@ -1135,7 +1094,6 @@ standby_monitor(void)
|
||||
" ) q ");
|
||||
|
||||
|
||||
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
@@ -1150,17 +1108,40 @@ standby_monitor(void)
|
||||
strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
|
||||
receiving_streamed_wal = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
if (receiving_streamed_wal == false)
|
||||
/*
|
||||
* If pg_last_xlog_receive_location is NULL, this means we're in archive
|
||||
* recovery and will need to calculate lag based on pg_last_xlog_replay_location
|
||||
*/
|
||||
|
||||
/*
|
||||
* Replayed WAL is greater than received streamed WAL
|
||||
*/
|
||||
if (PQgetisnull(res, 0, 1))
|
||||
{
|
||||
log_verbose(LOG_DEBUG, _("standby %i not connected to streaming replication"), local_options.node);
|
||||
receiving_streamed_wal = false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* In the unusual event of a standby becoming disconnected from the primary,
|
||||
* while this repmgrd remains connected to the primary, subtracting
|
||||
* "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
|
||||
* (long long unsigned int) will result in a meaningless, very large
|
||||
* value which will overflow a BIGINT column and spew error messages into the
|
||||
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
||||
* to insert a monitoring record.
|
||||
*/
|
||||
if (receiving_streamed_wal == true && last_xlog_receive_location_gte_replayed == false)
|
||||
{
|
||||
log_verbose(LOG_WARNING,
|
||||
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Get master xlog position
|
||||
*
|
||||
@@ -1182,9 +1163,25 @@ standby_monitor(void)
|
||||
|
||||
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
|
||||
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||
|
||||
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
|
||||
/* Calculate apply lag */
|
||||
if (last_xlog_receive_location_gte_replayed == false)
|
||||
{
|
||||
/*
|
||||
* We're not receiving streaming WAL - in this case the receive location
|
||||
* equals the last replayed location
|
||||
*/
|
||||
apply_lag = 0;
|
||||
strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN);
|
||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||
|
||||
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
|
||||
}
|
||||
|
||||
|
||||
/* Calculate replication lag */
|
||||
if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
|
||||
@@ -1230,7 +1227,6 @@ standby_monitor(void)
|
||||
last_xlog_receive_location,
|
||||
replication_lag,
|
||||
apply_lag);
|
||||
|
||||
/*
|
||||
* Execute the query asynchronously, but don't check for a result. We will
|
||||
* check the result next time we pause for a monitor step.
|
||||
@@ -1255,7 +1251,7 @@ do_master_failover(void)
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
int total_active_nodes = 0;
|
||||
int total_nodes = 0;
|
||||
int visible_nodes = 0;
|
||||
int ready_nodes = 0;
|
||||
|
||||
@@ -1286,7 +1282,7 @@ do_master_failover(void)
|
||||
"SELECT id, conninfo, type, upstream_node_id "
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND active IS TRUE "
|
||||
" AND active IS TRUE "
|
||||
" AND priority > 0 "
|
||||
" ORDER BY priority DESC, id "
|
||||
" LIMIT %i ",
|
||||
@@ -1302,25 +1298,32 @@ do_master_failover(void)
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
total_active_nodes = PQntuples(res);
|
||||
log_debug(_("%d active nodes registered\n"), total_active_nodes);
|
||||
/*
|
||||
* total nodes that are registered
|
||||
*/
|
||||
total_nodes = PQntuples(res);
|
||||
log_debug(_("%d active nodes registered\n"), total_nodes);
|
||||
|
||||
/*
|
||||
* Build an array with the nodes and indicate which ones are visible and
|
||||
* ready
|
||||
*/
|
||||
for (i = 0; i < total_active_nodes; i++)
|
||||
for (i = 0; i < total_nodes; i++)
|
||||
{
|
||||
char node_type[MAXLEN];
|
||||
|
||||
nodes[i] = (t_node_info) T_NODE_INFO_INITIALIZER;
|
||||
|
||||
nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
|
||||
|
||||
strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXCONNINFO);
|
||||
strncpy(node_type, PQgetvalue(res, i, 2), MAXLEN);
|
||||
|
||||
nodes[i].type = parse_node_type(node_type);
|
||||
nodes[i].type = parse_node_type(PQgetvalue(res, i, 2));
|
||||
|
||||
/* Copy details of the failed node */
|
||||
/* XXX only node_id is actually used later */
|
||||
if (nodes[i].type == MASTER)
|
||||
{
|
||||
failed_master.node_id = nodes[i].node_id;
|
||||
failed_master.xlog_location = nodes[i].xlog_location;
|
||||
failed_master.is_ready = nodes[i].is_ready;
|
||||
}
|
||||
|
||||
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
|
||||
|
||||
@@ -1331,42 +1334,11 @@ do_master_failover(void)
|
||||
nodes[i].is_visible = false;
|
||||
nodes[i].is_ready = false;
|
||||
|
||||
log_debug(_("node=%i conninfo=\"%s\" type=%s\n"),
|
||||
nodes[i].node_id,
|
||||
nodes[i].conninfo_str,
|
||||
node_type);
|
||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||
|
||||
/* Copy details of the failed master node */
|
||||
if (nodes[i].type == MASTER)
|
||||
{
|
||||
/* XXX only node_id is currently used */
|
||||
failed_master.node_id = nodes[i].node_id;
|
||||
|
||||
/*
|
||||
* XXX experimental
|
||||
*
|
||||
* Currently an attempt is made to connect to the master,
|
||||
* which is very likely to be a waste of time at this point, as we'll
|
||||
* have spent the last however many seconds trying to do just that
|
||||
* in check_connection() before deciding it's gone away.
|
||||
*
|
||||
* If the master did come back at this point, the voting algorithm should decide
|
||||
* it's the "best candidate" anyway and no standby will promote itself or
|
||||
* attempt to follow* another server.
|
||||
*
|
||||
* If we don't try and connect to the master here (and the code generally
|
||||
* assumes it's failed anyway) but it does come back any time from here
|
||||
* onwards, promotion will fail and the promotion candidate will
|
||||
* notice the reappearance.
|
||||
*
|
||||
* TLDR version: by skipping the master connection attempt (and the chances
|
||||
* the master would reappear between the last attempt in check_connection()
|
||||
* and now are minimal) we can remove useless cycles during the failover process;
|
||||
* if the master does reappear it will be caught before later anyway.
|
||||
*/
|
||||
|
||||
continue;
|
||||
}
|
||||
log_debug(_("node=%d conninfo=\"%s\" type=%s\n"),
|
||||
nodes[i].node_id, nodes[i].conninfo_str,
|
||||
PQgetvalue(res, i, 2));
|
||||
|
||||
node_conn = establish_db_connection(nodes[i].conninfo_str, false);
|
||||
|
||||
@@ -1387,13 +1359,13 @@ do_master_failover(void)
|
||||
PQclear(res);
|
||||
|
||||
log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
|
||||
total_active_nodes, visible_nodes);
|
||||
total_nodes, visible_nodes);
|
||||
|
||||
/*
|
||||
* Am I on the group that should keep alive? If I see less than half of
|
||||
* total_active_nodes then I should do nothing
|
||||
* total_nodes then I should do nothing
|
||||
*/
|
||||
if (visible_nodes < (total_active_nodes / 2.0))
|
||||
if (visible_nodes < (total_nodes / 2.0))
|
||||
{
|
||||
log_err(_("Unable to reach most of the nodes.\n"
|
||||
"Let the other standby servers decide which one will be the master.\n"
|
||||
@@ -1402,7 +1374,7 @@ do_master_failover(void)
|
||||
}
|
||||
|
||||
/* Query all available nodes to determine readiness and LSN */
|
||||
for (i = 0; i < total_active_nodes; i++)
|
||||
for (i = 0; i < total_nodes; i++)
|
||||
{
|
||||
log_debug("checking node %i...\n", nodes[i].node_id);
|
||||
|
||||
@@ -1480,7 +1452,7 @@ do_master_failover(void)
|
||||
PQclear(res);
|
||||
|
||||
/* Wait for each node to come up and report a valid LSN */
|
||||
for (i = 0; i < total_active_nodes; i++)
|
||||
for (i = 0; i < total_nodes; i++)
|
||||
{
|
||||
/*
|
||||
* ensure witness server is marked as ready, and skip
|
||||
@@ -1640,7 +1612,7 @@ do_master_failover(void)
|
||||
/*
|
||||
* determine which one is the best candidate to promote to master
|
||||
*/
|
||||
for (i = 0; i < total_active_nodes; i++)
|
||||
for (i = 0; i < total_nodes; i++)
|
||||
{
|
||||
/* witness server can never be a candidate */
|
||||
if (nodes[i].type == WITNESS)
|
||||
@@ -1729,8 +1701,6 @@ do_master_failover(void)
|
||||
{
|
||||
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
|
||||
|
||||
/* XXX log an event here? */
|
||||
|
||||
PQfinish(master_conn);
|
||||
master_conn = NULL;
|
||||
|
||||
@@ -1867,10 +1837,8 @@ do_master_failover(void)
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
|
||||
/*
|
||||
* setting "failover_done" to true will cause the node's monitoring loop
|
||||
* to restart in the appropriate mode for the node's (possibly new) role
|
||||
*/
|
||||
/* to force it to re-calculate mode and master node */
|
||||
// ^ ZZZ check that behaviour ^
|
||||
failover_done = true;
|
||||
}
|
||||
|
||||
@@ -2046,7 +2014,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
{
|
||||
if (conninfo == NULL)
|
||||
{
|
||||
log_err("INTERNAL ERROR: *conn == NULL && conninfo == NULL\n");
|
||||
log_err("INTERNAL ERROR: *conn == NULL && conninfo == NULL");
|
||||
terminate(ERR_INTERNAL);
|
||||
}
|
||||
*conn = establish_db_connection(conninfo, false);
|
||||
|
||||
31
strutil.c
31
strutil.c
@@ -87,34 +87,3 @@ maxlen_snprintf(char *str, const char *format,...)
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Adapted from: src/fe_utils/string_utils.c
|
||||
*
|
||||
* Function not publicly available before PostgreSQL 9.6.
|
||||
*/
|
||||
void
|
||||
appendShellString(PQExpBuffer buf, const char *str)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
appendPQExpBufferChar(buf, '\'');
|
||||
for (p = str; *p; p++)
|
||||
{
|
||||
if (*p == '\n' || *p == '\r')
|
||||
{
|
||||
fprintf(stderr,
|
||||
_("shell command argument contains a newline or carriage return: \"%s\"\n"),
|
||||
str);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*p == '\'')
|
||||
appendPQExpBufferStr(buf, "'\"'\"'");
|
||||
else
|
||||
appendPQExpBufferChar(buf, *p);
|
||||
}
|
||||
|
||||
appendPQExpBufferChar(buf, '\'');
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#define _STRUTIL_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "pqexpbuffer.h"
|
||||
#include "errcode.h"
|
||||
|
||||
|
||||
@@ -49,6 +48,4 @@ extern int
|
||||
maxlen_snprintf(char *str, const char *format,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
|
||||
extern void
|
||||
appendShellString(PQExpBuffer buf, const char *str);
|
||||
#endif /* _STRUTIL_H_ */
|
||||
|
||||
Reference in New Issue
Block a user