mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2615cffecc | ||
|
|
1f838f99c2 | ||
|
|
d3f119005b | ||
|
|
db6d4d8820 | ||
|
|
7a8a50e229 | ||
|
|
e188044593 | ||
|
|
636f4b03c6 | ||
|
|
bf96b383a3 | ||
|
|
3a2e40f381 | ||
|
|
c608bb28ee | ||
|
|
ca9c2e1143 | ||
|
|
3a6d6b8899 | ||
|
|
4091cb7f18 | ||
|
|
870b0a53b6 | ||
|
|
6184cc57be | ||
|
|
e1254b6773 | ||
|
|
1c9121c2d8 | ||
|
|
6da03a6157 | ||
|
|
9bb6befa25 | ||
|
|
a8e5c68d03 | ||
|
|
b83e18c503 | ||
|
|
d4b845d213 | ||
|
|
75aad9a85e | ||
|
|
e115825cd6 | ||
|
|
6cf5ab2e53 | ||
|
|
f8119d20ea | ||
|
|
0caddf2d2c | ||
|
|
a4abbc6f0c | ||
|
|
d7e489ea0a | ||
|
|
2bcacff3b3 | ||
|
|
45eb0ea5d3 | ||
|
|
c3bd02b83d | ||
|
|
8e7d110a22 | ||
|
|
43874d5576 | ||
|
|
87ff9d09ba | ||
|
|
c429b0b186 | ||
|
|
03b88178c1 | ||
|
|
5f33f4286f | ||
|
|
932f84910b | ||
|
|
1ef7f1368d | ||
|
|
640abed18f | ||
|
|
ef6b24551a | ||
|
|
42847e44d2 | ||
|
|
dd7cfce3d3 | ||
|
|
30fd111cba | ||
|
|
65e63b062e | ||
|
|
053f672caa | ||
|
|
f6d02b85d8 | ||
|
|
6ebf3a7319 | ||
|
|
7345ddcf00 | ||
|
|
eb0af7ca23 | ||
|
|
ae47e5f413 | ||
|
|
46100a9549 | ||
|
|
9bd95cabdf | ||
|
|
f1584469bf | ||
|
|
a7f46d24de | ||
|
|
462d446477 | ||
|
|
23a72f489c | ||
|
|
f3f56b0cd6 | ||
|
|
00146b7fbd | ||
|
|
faf72a2514 | ||
|
|
7010b636e0 | ||
|
|
00deff9069 | ||
|
|
5240a5723a | ||
|
|
45e29c5b28 | ||
|
|
5def293ed6 | ||
|
|
ff7b4d3f02 | ||
|
|
a54478a045 | ||
|
|
7ad9a2c28a | ||
|
|
0037e66034 |
29
CONTRIBUTING.md
Normal file
29
CONTRIBUTING.md
Normal file
@@ -0,0 +1,29 @@
|
||||
License and Contributions
|
||||
=========================
|
||||
|
||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||
Copyright 2010-2015, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||
details.
|
||||
|
||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||
|
||||
Additional work has been sponsored by the 4CaaST project for cloud computing,
|
||||
which has received funding from the European Union's Seventh Framework Programme
|
||||
(FP7/2007-2013) under grant agreement 258862.
|
||||
|
||||
Contributions to `repmgr` are welcome, and will be listed in the file `CREDITS`.
|
||||
2ndQuadrant Limited requires that any contributions provide a copyright
|
||||
assignment and a disclaimer of any work-for-hire ownership claims from the
|
||||
employer of the developer. This lets us make sure that all of the repmgr
|
||||
distribution remains free code. Please contact info@2ndQuadrant.com for a
|
||||
copy of the relevant Copyright Assignment Form.
|
||||
|
||||
Code style
|
||||
----------
|
||||
|
||||
Code in repmgr is formatted to a consistent style using the following command:
|
||||
|
||||
astyle --style=ansi --indent=tab --suffix=none *.c *.h
|
||||
|
||||
Contributors should reformat their code similarly before submitting code to
|
||||
the project, in order to minimize merge conflicts with other work.
|
||||
@@ -203,6 +203,12 @@ repmgr will also ask for the superuser password on the witness database so
|
||||
it can reconnect when needed (the command line option --initdb-no-pwprompt
|
||||
will set up a password-less superuser).
|
||||
|
||||
By default the witness server will listen on port 5499; this value can be
|
||||
overridden by explicitly providing the port number in the conninfo string
|
||||
in repmgr.conf. (Note that it is also possible to specify the port number
|
||||
with the -l/--local-port option, however this option is now deprecated and
|
||||
will be overridden by a port setting in the conninfo string).
|
||||
|
||||
Start the repmgrd daemons
|
||||
-------------------------
|
||||
|
||||
|
||||
16
HISTORY
16
HISTORY
@@ -1,3 +1,19 @@
|
||||
3.0.2 2015-09-
|
||||
Improve handling of --help/--version options; and improve help output (Ian)
|
||||
Improve handling of situation where logfile can't be opened (Ian)
|
||||
Always pass -D/--pgdata option to pg_basebackup (Ian)
|
||||
Bugfix: standby clone --force does not empty pg_xlog (Gianni)
|
||||
Bugfix: autofailover with reconnect_attempts > 1 (Gianni)
|
||||
Bugfix: ignore comments after values (soxwellfb)
|
||||
Bugfix: handle string values in 'node' parameter correctly (Gregory Duchatelet)
|
||||
Allow repmgr to be compiled with a newer libpq (Marco)
|
||||
Bugfix: call update_node_record_set_upstream() for STANDBY FOLLOW (Tomas)
|
||||
Update `repmgr --help` output (per Github report from renard)
|
||||
Update tablespace remapping in --rsync-only mode for 9.5 and later (Ian)
|
||||
Deprecate `-l/--local-port` option - the port can be extracted
|
||||
from the conninfo string in repmgr.conf (Ian)
|
||||
Add STANDBY UNREGISTE (Vik Fearing)
|
||||
|
||||
3.0.1 2015-04-16
|
||||
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
||||
When promoting a standby, have repmgr (not repmgrd) handle metadata updates (Ian)
|
||||
|
||||
38
PACKAGES.md
38
PACKAGES.md
@@ -4,10 +4,10 @@ Packaging
|
||||
Notes on RedHat Linux, Fedora, and CentOS Builds
|
||||
------------------------------------------------
|
||||
|
||||
The RPM packages of PostgreSQL put ``pg_config`` into the ``postgresql-devel``
|
||||
The RPM packages of PostgreSQL put `pg_config` into the `postgresql-devel`
|
||||
package, not the main server one. And if you have a RPM install of PostgreSQL
|
||||
9.0, the entire PostgreSQL binary directory will not be in your PATH by default
|
||||
either. Individual utilities are made available via the ``alternatives``
|
||||
either. Individual utilities are made available via the `alternatives`
|
||||
mechanism, but not all commands will be wrapped that way. The files installed
|
||||
by repmgr will certainly not be in the default PATH for the postgres user
|
||||
on such a system. They will instead be in /usr/pgsql-9.0/bin/ on this
|
||||
@@ -15,31 +15,33 @@ type of system.
|
||||
|
||||
When building repmgr against a RPM packaged build, you may discover that some
|
||||
development packages are needed as well. The following build errors can
|
||||
occur::
|
||||
occur:
|
||||
|
||||
/usr/bin/ld: cannot find -lxslt
|
||||
/usr/bin/ld: cannot find -lpam
|
||||
|
||||
Install the following packages to correct those::
|
||||
Install the following packages to correct those:
|
||||
|
||||
|
||||
yum install libxslt-devel
|
||||
yum install pam-devel
|
||||
|
||||
If building repmgr as a regular user, then doing the install into the system
|
||||
directories using sudo, the syntax is hard. ``pg_config`` won't be in root's
|
||||
path either. The following recipe should work::
|
||||
directories using sudo, the syntax is hard. `pg_config` won't be in root's
|
||||
path either. The following recipe should work:
|
||||
|
||||
sudo PATH="/usr/pgsql-9.0/bin:$PATH" make USE_PGXS=1 install
|
||||
|
||||
|
||||
Issues with 32 and 64 bit RPMs
|
||||
------------------------------
|
||||
|
||||
If when building, you receive a series of errors of this form::
|
||||
If when building, you receive a series of errors of this form:
|
||||
|
||||
/usr/bin/ld: skipping incompatible /usr/pgsql-9.0/lib/libpq.so when searching for -lpq
|
||||
|
||||
This is likely because you have both the 32 and 64 bit versions of the
|
||||
``postgresql90-devel`` package installed. You can check that like this::
|
||||
`postgresql90-devel` package installed. You can check that like this:
|
||||
|
||||
rpm -qa --queryformat '%{NAME}\t%{ARCH}\n' | grep postgresql90-devel
|
||||
|
||||
@@ -47,7 +49,8 @@ And if two packages appear, one for i386 and one for x86_64, that's not supposed
|
||||
to be allowed.
|
||||
|
||||
This can happen when using the PGDG repo to install that package;
|
||||
here is an example sessions demonstrating the problem case appearing::
|
||||
here is an example sessions demonstrating the problem case appearing:
|
||||
|
||||
|
||||
# yum install postgresql-devel
|
||||
..
|
||||
@@ -67,20 +70,21 @@ here is an example sessions demonstrating the problem case appearing::
|
||||
postgresql90-devel i386 9.0.2-2PGDG.rhel5 pgdg90 1.5 M
|
||||
postgresql90-devel x86_64 9.0.2-2PGDG.rhel5 pgdg90 1.6 M
|
||||
|
||||
|
||||
Note how both the i386 and x86_64 platform architectures are selected for
|
||||
installation. Your main PostgreSQL package will only be compatible with one of
|
||||
those, and if the repmgr build finds the wrong postgresql90-devel these
|
||||
"skipping incompatible" messages appear.
|
||||
|
||||
In this case, you can temporarily remove both packages, then just install the
|
||||
correct one for your architecture. Example::
|
||||
correct one for your architecture. Example:
|
||||
|
||||
rpm -e postgresql90-devel --allmatches
|
||||
yum install postgresql90-devel-9.0.2-2PGDG.rhel5.x86_64
|
||||
|
||||
Instead just deleting the package from the wrong platform might not leave behind
|
||||
the correct files, due to the way in which these accidentally happen to interact.
|
||||
If you already tried to build repmgr before doing this, you'll need to do::
|
||||
If you already tried to build repmgr before doing this, you'll need to do:
|
||||
|
||||
make USE_PGXS=1 clean
|
||||
|
||||
@@ -89,17 +93,17 @@ to get rid of leftover files from the wrong architecture.
|
||||
Notes on Ubuntu, Debian or other Debian-based Builds
|
||||
----------------------------------------------------
|
||||
|
||||
The Debian packages of PostgreSQL put ``pg_config`` into the development package
|
||||
called ``postgresql-server-dev-$version``.
|
||||
The Debian packages of PostgreSQL put `pg_config` into the development package
|
||||
called `postgresql-server-dev-$version`.
|
||||
|
||||
When building repmgr against a Debian packages build, you may discover that some
|
||||
development packages are needed as well. You will need the following development
|
||||
packages installed::
|
||||
packages installed:
|
||||
|
||||
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
||||
|
||||
If you're using Debian packages for PostgreSQL and are building repmgr with the
|
||||
USE_PGXS option you also need to install the corresponding development package::
|
||||
USE_PGXS option you also need to install the corresponding development package:
|
||||
|
||||
sudo apt-get install postgresql-server-dev-9.0
|
||||
|
||||
@@ -110,12 +114,12 @@ multiple installed versions of PostgreSQL on the same system through a wrapper
|
||||
called pg_wrapper and repmgr is not (yet) known to this wrapper.
|
||||
|
||||
You can solve this in many different ways, the most Debian like is to make an
|
||||
alternate for repmgr and repmgrd::
|
||||
alternate for repmgr and repmgrd:
|
||||
|
||||
sudo update-alternatives --install /usr/bin/repmgr repmgr /usr/lib/postgresql/9.0/bin/repmgr 10
|
||||
sudo update-alternatives --install /usr/bin/repmgrd repmgrd /usr/lib/postgresql/9.0/bin/repmgrd 10
|
||||
|
||||
You can also make a deb package of repmgr using::
|
||||
You can also make a deb package of repmgr using:
|
||||
|
||||
make USE_PGXS=1 deb
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ Master setup
|
||||
CREATE DATABASE repmgr_db OWNER repmgr_usr;
|
||||
```
|
||||
|
||||
- configure `postgresql.conf` for replication (see above)
|
||||
- configure `postgresql.conf` for replication (see README.md for sample
|
||||
settings)
|
||||
|
||||
- update `pg_hba.conf`, e.g.:
|
||||
|
||||
@@ -111,7 +112,7 @@ created in the `repl_nodes` table should look something like this:
|
||||
|
||||
repmgr_db=# SELECT * from repmgr_test.repl_nodes;
|
||||
id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active
|
||||
----+---------+------------------+---------+-------+-------------------------------------------------+-----------+----------+--------
|
||||
1 | primary | | test | node1 | host=localhost user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
2 | standby | 1 | test | node2 | host=localhost user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
----+---------+------------------+---------+-------+----------------------------------------------------+-----------+----------+--------
|
||||
1 | primary | | test | node1 | host=repmgr_node1 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
2 | standby | 1 | test | node2 | host=repmgr_node2 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
(2 rows)
|
||||
|
||||
45
README.md
45
README.md
@@ -7,7 +7,7 @@ hot-standby capabilities with tools to set up standby servers, monitor
|
||||
replication, and perform administrative tasks such as failover or manual
|
||||
switchover operations.
|
||||
|
||||
This document covers `repmgr 3`, which supports PostgreSQL 9.4 and 9.3.
|
||||
This document covers `repmgr 3`, which supports PostgreSQL 9.3 and later.
|
||||
This version can use `pg_basebackup` to clone standby servers, supports
|
||||
replication slots and cascading replication, doesn't require a restart
|
||||
after promotion, and has many usability improvements.
|
||||
@@ -53,7 +53,7 @@ on any UNIX-like system which PostgreSQL itself supports.
|
||||
|
||||
All nodes must be running the same major version of PostgreSQL, and we
|
||||
recommend that they also run the same minor version. This version of
|
||||
`repmgr` (v3) supports PostgreSQL 9.3 and 9.4.
|
||||
`repmgr` (v3) supports PostgreSQL 9.3 and later.
|
||||
|
||||
Earlier versions of `repmgr` needed password-less SSH access between
|
||||
nodes in order to clone standby servers using `rsync`. `repmgr 3` can
|
||||
@@ -98,8 +98,8 @@ for details.
|
||||
|
||||
### PostgreSQL configuration
|
||||
|
||||
The primary server needs to be configured for replication with the
|
||||
following settings in `postgresql.conf`:
|
||||
The primary server needs to be configured for replication with settings
|
||||
like the following in `postgresql.conf`:
|
||||
|
||||
# Allow read-only queries on standby servers. The number of WAL
|
||||
# senders should be larger than the number of standby servers.
|
||||
@@ -121,13 +121,18 @@ following settings in `postgresql.conf`:
|
||||
archive_mode = on
|
||||
archive_command = 'cd .'
|
||||
|
||||
# You can also set additional replication parameters here, such as
|
||||
# hot_standby_feedback or synchronous_standby_names.
|
||||
# If you plan to use repmgrd, ensure that shared_preload_libraries
|
||||
# is configured to load 'repmgr_funcs'
|
||||
|
||||
shared_preload_libraries = 'repmgr_funcs'
|
||||
|
||||
PostgreSQL 9.4 makes it possible to use replication slots, which means
|
||||
the value of wal_keep_segments need no longer be set. With 9.3, `repmgr`
|
||||
expects it to be set to at least 5000 (= 80GB of WAL) by default, though
|
||||
this can be overriden with the `-w N` argument.
|
||||
the value of `wal_keep_segments` need no longer be set. See section
|
||||
"Replication slots" below for more details.
|
||||
|
||||
With PostgreSQL 9.3, `repmgr` expects `wal_keep_segments` to be set to
|
||||
at least 5000 (= 80GB of WAL) by default, though this can be overriden
|
||||
with the `-w N` argument.
|
||||
|
||||
A dedicated PostgreSQL superuser account and a database in which to
|
||||
store monitoring and replication data are required. Create them by
|
||||
@@ -223,7 +228,7 @@ The node can then be restarted.
|
||||
The node will then need to be re-registered with `repmgr`; again
|
||||
the `--force` option is required to update the existing record:
|
||||
|
||||
repmgr -f /etc/repmgr/repmgr.conf
|
||||
repmgr -f /etc/repmgr/repmgr.conf \
|
||||
--force \
|
||||
standby register
|
||||
|
||||
@@ -345,6 +350,7 @@ Following event types currently exist:
|
||||
|
||||
master_register
|
||||
standby_register
|
||||
standby_unregister
|
||||
standby_clone
|
||||
standby_promote
|
||||
witness_create
|
||||
@@ -398,6 +404,18 @@ stored in the `repl_nodes` table.
|
||||
Note that `repmgr` will fail with an error if this option is specified when
|
||||
working with PostgreSQL 9.3.
|
||||
|
||||
Be aware that when initially cloning a standby, you will need to ensure
|
||||
that all required WAL files remain available while the cloning is taking
|
||||
place. If using the default `pg_basebackup` method, we recommend setting
|
||||
`pg_basebackup`'s `--xlog-method` parameter to `stream` like this:
|
||||
|
||||
pg_basebackup_options='--xlog-method=stream'
|
||||
|
||||
See the `pg_basebackup` documentation [*] for details. Otherwise you'll need
|
||||
to set `wal_keep_segments` to an appropriately high value.
|
||||
|
||||
[*] http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
|
||||
Further reading:
|
||||
* http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||
* http://blog.2ndquadrant.com/postgresql-9-4-slots/
|
||||
@@ -435,12 +453,19 @@ its port if is different from the default one.
|
||||
Registers a master in a cluster. This command needs to be executed before any
|
||||
standby nodes are registered.
|
||||
|
||||
`primary register` can be used as an alias for `master register`.
|
||||
|
||||
* `standby register`
|
||||
|
||||
Registers a standby with `repmgr`. This command needs to be executed to enable
|
||||
promote/follow operations and to allow `repmgrd` to work with the node.
|
||||
An existing standby can be registered using this command.
|
||||
|
||||
* `standby unregister`
|
||||
|
||||
Unregisters a standby with `repmgr`. This command does not affect the actual
|
||||
replication.
|
||||
|
||||
* `standby clone [node to be cloned]`
|
||||
|
||||
Clones a new standby node from the data directory of the master (or
|
||||
|
||||
@@ -12,7 +12,7 @@ REPMGRD_ENABLED=no
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGRD_BIN=/usr/bin/repmgr
|
||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
||||
|
||||
# pid file
|
||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||
|
||||
13
SSH-RSYNC.md
13
SSH-RSYNC.md
@@ -1,12 +1,13 @@
|
||||
Set up trusted copy between postgres accounts
|
||||
---------------------------------------------
|
||||
|
||||
If you need to use rsync to clone standby servers, the postgres account
|
||||
on your master and standby servers must be each able to access the other
|
||||
If you need to use `rsync` to clone standby servers, the `postgres` account
|
||||
on your primary and standby servers must be each able to access the other
|
||||
using SSH without a password.
|
||||
|
||||
First generate a ssh key, using an empty passphrase, and copy the resulting
|
||||
keys and a maching authorization file to a privledged user on the other system::
|
||||
First generate an ssh key, using an empty passphrase, and copy the resulting
|
||||
keys and a matching authorization file to a privileged user account on the other
|
||||
system:
|
||||
|
||||
[postgres@node1]$ ssh-keygen -t rsa
|
||||
Generating public/private rsa key pair.
|
||||
@@ -22,8 +23,8 @@ keys and a maching authorization file to a privledged user on the other system::
|
||||
[postgres@node1]$ cd ~/.ssh
|
||||
[postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys user@node2:
|
||||
|
||||
Login as a user on the other system, and install the files into the postgres
|
||||
user's account::
|
||||
Login as a user on the other system, and install the files into the `postgres`
|
||||
user's account:
|
||||
|
||||
[user@node2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa
|
||||
[user@node2 ~]$ sudo mkdir -p ~postgres/.ssh
|
||||
|
||||
23
TODO
23
TODO
@@ -10,6 +10,10 @@ Known issues in repmgr
|
||||
Planned feature improvements
|
||||
============================
|
||||
|
||||
* Use 'primary' instead of 'master' in documentation and log output
|
||||
for consistency with PostgreSQL documentation. See also commit
|
||||
870b0a53b627eeb9aca1fc14cbafe25b5beafe12.
|
||||
|
||||
* A better check which standby did receive most of the data
|
||||
|
||||
* Make the fact that a standby may be delayed a factor in the voting
|
||||
@@ -22,6 +26,19 @@ Planned feature improvements
|
||||
* Use pg_basebackup for the data directory, and ALSO rsync for the
|
||||
configuration files.
|
||||
|
||||
* Use pg_basebackup -X s
|
||||
NOTE: this can be used by including `-X s` in the configuration parameter
|
||||
`pg_basebackup_options`
|
||||
* If no configuration file supplied, search in sensible default locations
|
||||
(currently: current directory and `pg_config --sysconfdir`); if
|
||||
possible this should include the location provided by the package,
|
||||
if installed.
|
||||
|
||||
* repmgrd: if connection to the upstream node fails on startup, optionally
|
||||
retry for a certain period before giving up; this will cover cases when
|
||||
e.g. primary and standby are both starting up, and the standby comes up
|
||||
before the primary. See github issue #80.
|
||||
|
||||
* make old master node ID available for event notification commands
|
||||
(See github issue #80).
|
||||
|
||||
* Have pg_basebackup use replication slots, if and when support for
|
||||
this is added; see:
|
||||
http://www.postgresql.org/message-id/555DD2B2.7020000@gmx.net
|
||||
|
||||
53
check_dir.c
53
check_dir.c
@@ -23,14 +23,19 @@
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ftw.h>
|
||||
|
||||
/* NB: postgres_fe must be included BEFORE check_dir */
|
||||
#include "postgres_fe.h"
|
||||
#include "check_dir.h"
|
||||
#include <libpq-fe.h>
|
||||
#include <postgres_fe.h>
|
||||
|
||||
#include "check_dir.h"
|
||||
#include "strutil.h"
|
||||
#include "log.h"
|
||||
|
||||
static bool _create_pg_dir(char *dir, bool force, bool for_witness);
|
||||
static int unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf);
|
||||
|
||||
/*
|
||||
* make sure the directory either doesn't exist or is empty
|
||||
* we use this function to check the new data directory and
|
||||
@@ -243,6 +248,19 @@ is_pg_dir(char *dir)
|
||||
|
||||
bool
|
||||
create_pg_dir(char *dir, bool force)
|
||||
{
|
||||
return _create_pg_dir(dir, force, false);
|
||||
}
|
||||
|
||||
bool
|
||||
create_witness_pg_dir(char *dir, bool force)
|
||||
{
|
||||
return _create_pg_dir(dir, force, true);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
_create_pg_dir(char *dir, bool force, bool for_witness)
|
||||
{
|
||||
bool pg_dir = false;
|
||||
|
||||
@@ -279,12 +297,24 @@ create_pg_dir(char *dir, bool force)
|
||||
|
||||
pg_dir = is_pg_dir(dir);
|
||||
|
||||
/*
|
||||
* we use force to reduce the time needed to restore a node which
|
||||
* turn async after a failover or anything else
|
||||
*/
|
||||
|
||||
if (pg_dir && force)
|
||||
{
|
||||
|
||||
/*
|
||||
* The witness server does not store any data other than a copy of the
|
||||
* repmgr metadata, so in --force mode we can simply overwrite the
|
||||
* directory.
|
||||
*
|
||||
* For non-witness servers, we'll leave the data in place, both to reduce
|
||||
* the risk of unintentional data loss and to make it possible for the
|
||||
* data directory to be brought up-to-date with rsync.
|
||||
*/
|
||||
if (for_witness)
|
||||
{
|
||||
log_notice(_("deleting existing data directory \"%s\"\n"), dir);
|
||||
nftw(dir, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||
}
|
||||
/* Let it continue */
|
||||
break;
|
||||
}
|
||||
@@ -306,3 +336,14 @@ create_pg_dir(char *dir, bool force)
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
|
||||
{
|
||||
int rv = remove(fpath);
|
||||
|
||||
if (rv)
|
||||
perror(fpath);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
@@ -26,5 +26,6 @@ bool create_dir(char *dir);
|
||||
bool set_dir_permissions(char *dir);
|
||||
bool is_pg_dir(char *dir);
|
||||
bool create_pg_dir(char *dir, bool force);
|
||||
bool create_witness_pg_dir(char *dir, bool force);
|
||||
|
||||
#endif
|
||||
|
||||
145
config.c
145
config.c
@@ -27,9 +27,11 @@
|
||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
|
||||
static char config_file_path[MAXPGPATH];
|
||||
static bool config_file_provided = false;
|
||||
|
||||
/*
|
||||
* parse_config()
|
||||
* load_config()
|
||||
*
|
||||
* Set default options and overwrite with values from provided configuration
|
||||
* file.
|
||||
@@ -40,30 +42,21 @@ static void tablespace_list_append(t_configuration_options *options, const char
|
||||
* reload_config()
|
||||
*/
|
||||
bool
|
||||
parse_config(const char *config_file, t_configuration_options *options)
|
||||
load_config(const char *config_file, t_configuration_options *options, char *argv0)
|
||||
{
|
||||
char *s,
|
||||
buff[MAXLINELENGTH];
|
||||
char config_file_buf[MAXLEN];
|
||||
char name[MAXLEN];
|
||||
char value[MAXLEN];
|
||||
bool config_file_provided = false;
|
||||
FILE *fp;
|
||||
|
||||
struct stat config;
|
||||
/* Sanity checks */
|
||||
|
||||
/*
|
||||
* If a configuration file was provided, check it exists, otherwise
|
||||
* emit an error
|
||||
* emit an error and terminate
|
||||
*/
|
||||
if (config_file[0])
|
||||
{
|
||||
struct stat config;
|
||||
strncpy(config_file_path, config_file, MAXPGPATH);
|
||||
canonicalize_path(config_file_path);
|
||||
|
||||
strncpy(config_file_buf, config_file, MAXLEN);
|
||||
canonicalize_path(config_file_buf);
|
||||
|
||||
if(stat(config_file_buf, &config) != 0)
|
||||
if (stat(config_file_path, &config) != 0)
|
||||
{
|
||||
log_err(_("provided configuration file '%s' not found: %s\n"),
|
||||
config_file,
|
||||
@@ -76,16 +69,53 @@ parse_config(const char *config_file, t_configuration_options *options)
|
||||
}
|
||||
|
||||
/*
|
||||
* If no configuration file was provided, set to a default file
|
||||
* which `parse_config()` will attempt to read if it exists
|
||||
* If no configuration file was provided, attempt to find a default file
|
||||
*/
|
||||
else
|
||||
if (config_file_provided == false)
|
||||
{
|
||||
strncpy(config_file_buf, DEFAULT_CONFIG_FILE, MAXLEN);
|
||||
char my_exec_path[MAXPGPATH];
|
||||
char etc_path[MAXPGPATH];
|
||||
|
||||
/* First check if one is in the default sysconfdir */
|
||||
if (find_my_exec(argv0, my_exec_path) < 0)
|
||||
{
|
||||
fprintf(stderr, _("%s: could not find own program executable\n"), argv0);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
get_etc_path(my_exec_path, etc_path);
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "%s/repmgr.conf", etc_path);
|
||||
|
||||
log_debug(_("Looking for configuration file in %s\n"), etc_path);
|
||||
|
||||
if (stat(config_file_path, &config) != 0)
|
||||
{
|
||||
/* Not found - default to ./repmgr.conf */
|
||||
strncpy(config_file_path, DEFAULT_CONFIG_FILE, MAXPGPATH);
|
||||
canonicalize_path(config_file_path);
|
||||
log_debug(_("Looking for configuration file in %s\n"), config_file_path);
|
||||
}
|
||||
}
|
||||
|
||||
return parse_config(options);
|
||||
}
|
||||
|
||||
|
||||
fp = fopen(config_file_buf, "r");
|
||||
bool
|
||||
parse_config(t_configuration_options *options)
|
||||
{
|
||||
FILE *fp;
|
||||
char *s,
|
||||
buff[MAXLINELENGTH];
|
||||
char name[MAXLEN];
|
||||
char value[MAXLEN];
|
||||
|
||||
/* For sanity-checking provided conninfo string */
|
||||
PQconninfoOption *conninfo_options;
|
||||
char *conninfo_errmsg = NULL;
|
||||
|
||||
fp = fopen(config_file_path, "r");
|
||||
|
||||
/*
|
||||
* Since some commands don't require a config file at all, not having one
|
||||
@@ -101,7 +131,7 @@ parse_config(const char *config_file, t_configuration_options *options)
|
||||
{
|
||||
if (config_file_provided)
|
||||
{
|
||||
log_err(_("unable to open provided configuration file '%s'; terminating\n"), config_file_buf);
|
||||
log_err(_("unable to open provided configuration file '%s'; terminating\n"), config_file_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -149,13 +179,17 @@ parse_config(const char *config_file, t_configuration_options *options)
|
||||
{
|
||||
bool known_parameter = true;
|
||||
|
||||
/* Skip blank lines and comments */
|
||||
if (buff[0] == '\n' || buff[0] == '#')
|
||||
continue;
|
||||
|
||||
/* Parse name/value pair from line */
|
||||
parse_line(buff, name, value);
|
||||
|
||||
/* Skip blank lines */
|
||||
if (!strlen(name))
|
||||
continue;
|
||||
|
||||
/* Skip comments */
|
||||
if (name[0] == '#')
|
||||
continue;
|
||||
|
||||
/* Copy into correct entry in parameters struct */
|
||||
if (strcmp(name, "cluster") == 0)
|
||||
strncpy(options->cluster_name, value, MAXLEN);
|
||||
@@ -262,6 +296,12 @@ parse_config(const char *config_file, t_configuration_options *options)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->node == 0)
|
||||
{
|
||||
log_err(_("'node' must be an integer greater than zero\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->node_name == '\0')
|
||||
{
|
||||
log_err(_("required parameter 'node_name' was not found\n"));
|
||||
@@ -274,6 +314,19 @@ parse_config(const char *config_file, t_configuration_options *options)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Sanity check the provided conninfo string
|
||||
*
|
||||
* NOTE: this verifies the string format and checks for valid options
|
||||
* but does not sanity check values
|
||||
*/
|
||||
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
||||
if (conninfo_options == NULL)
|
||||
{
|
||||
log_err(_("Parameter 'conninfo' is invalid: %s"), conninfo_errmsg);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
PQconninfoFree(conninfo_options);
|
||||
|
||||
/* The following checks are for valid parameter values */
|
||||
if (options->master_response_timeout <= 0)
|
||||
{
|
||||
@@ -331,24 +384,50 @@ parse_line(char *buff, char *name, char *value)
|
||||
int j = 0;
|
||||
|
||||
/*
|
||||
* first we find the name of the parameter
|
||||
* Extract parameter name, if present
|
||||
*/
|
||||
for (; i < MAXLEN; ++i)
|
||||
{
|
||||
if (buff[i] != '=')
|
||||
name[j++] = buff[i];
|
||||
else
|
||||
|
||||
if (buff[i] == '=')
|
||||
break;
|
||||
|
||||
switch(buff[i])
|
||||
{
|
||||
/* Ignore whitespace */
|
||||
case ' ':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\t':
|
||||
continue;
|
||||
default:
|
||||
name[j++] = buff[i];
|
||||
}
|
||||
}
|
||||
name[j] = '\0';
|
||||
|
||||
/*
|
||||
* Now the value
|
||||
* Ignore any whitespace following the '=' sign
|
||||
*/
|
||||
for (; i < MAXLEN; ++i)
|
||||
{
|
||||
if (buff[i+1] == ' ')
|
||||
continue;
|
||||
if (buff[i+1] == '\t')
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract parameter value
|
||||
*/
|
||||
j = 0;
|
||||
for (++i; i < MAXLEN; ++i)
|
||||
if (buff[i] == '\'')
|
||||
continue;
|
||||
else if (buff[i] == '#')
|
||||
break;
|
||||
else if (buff[i] != '\n')
|
||||
value[j++] = buff[i];
|
||||
else
|
||||
@@ -358,7 +437,7 @@ parse_line(char *buff, char *name, char *value)
|
||||
}
|
||||
|
||||
bool
|
||||
reload_config(char *config_file, t_configuration_options * orig_options)
|
||||
reload_config(t_configuration_options *orig_options)
|
||||
{
|
||||
PGconn *conn;
|
||||
t_configuration_options new_options;
|
||||
@@ -369,7 +448,7 @@ reload_config(char *config_file, t_configuration_options * orig_options)
|
||||
*/
|
||||
log_info(_("reloading configuration file and updating repmgr tables\n"));
|
||||
|
||||
parse_config(config_file, &new_options);
|
||||
parse_config(&new_options);
|
||||
if (new_options.node == -1)
|
||||
{
|
||||
log_warning(_("unable to parse new configuration, retaining current configuration\n"));
|
||||
|
||||
5
config.h
5
config.h
@@ -83,9 +83,10 @@ typedef struct
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
|
||||
bool parse_config(const char *config_file, t_configuration_options *options);
|
||||
bool load_config(const char *config_file, t_configuration_options *options, char *argv0);
|
||||
bool reload_config(t_configuration_options *orig_options);
|
||||
bool parse_config(t_configuration_options *options);
|
||||
void parse_line(char *buff, char *name, char *value);
|
||||
char *trim(char *s);
|
||||
bool reload_config(char *config_file, t_configuration_options *orig_options);
|
||||
|
||||
#endif
|
||||
|
||||
125
dbutils.c
125
dbutils.c
@@ -82,6 +82,72 @@ establish_db_connection_by_params(const char *keywords[], const char *values[],
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
begin_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(conn, "BEGIN");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to begin transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
commit_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(conn, "COMMIT");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to commit transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
rollback_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(conn, "ROLLBACK");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to rollback transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
check_cluster_schema(PGconn *conn)
|
||||
{
|
||||
@@ -1080,8 +1146,8 @@ delete_node_record(PGconn *conn, int node, char *action)
|
||||
*
|
||||
* Note this function may be called with `conn` set to NULL in cases where
|
||||
* the master node is not available and it's therefore not possible to write
|
||||
* an event record. In this case, if `event_notification_command` is set a user-
|
||||
* defined notification to be generated; if not, this function will have
|
||||
* an event record. In this case, if `event_notification_command` is set, a
|
||||
* user-defined notification to be generated; if not, this function will have
|
||||
* no effect.
|
||||
*/
|
||||
|
||||
@@ -1094,7 +1160,7 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
||||
bool success = true;
|
||||
struct tm ts;
|
||||
|
||||
/* Only attempt to write a record if a connection handle was provided/
|
||||
/* Only attempt to write a record if a connection handle was provided.
|
||||
Also check that the repmgr schema has been properly intialised - if
|
||||
not it means no configuration file was provided, which can happen with
|
||||
e.g. `repmgr standby clone`, and we won't know which schema to write to.
|
||||
@@ -1283,3 +1349,56 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
bool
|
||||
update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
log_debug(_("update_node_record_set_upstream(): Updating node %i's upstream node to %i\n"), this_node_id, new_upstream_node_id);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET upstream_node_id = %i "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
new_upstream_node_id,
|
||||
cluster_name,
|
||||
this_node_id);
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set new upstream node id: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
PGresult *
|
||||
get_node_record(PGconn *conn, char *cluster, int node_id)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
sprintf(sqlquery,
|
||||
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
cluster,
|
||||
node_id);
|
||||
|
||||
log_debug("get_node_record(): %s\n", sqlquery);
|
||||
|
||||
return PQexec(conn, sqlquery);
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ PGconn *establish_db_connection(const char *conninfo,
|
||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
const char *values[],
|
||||
const bool exit_on_error);
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
bool rollback_transaction(PGconn *conn);
|
||||
bool check_cluster_schema(PGconn *conn);
|
||||
int is_standby(PGconn *conn);
|
||||
bool is_pgup(PGconn *conn, int timeout);
|
||||
@@ -63,6 +66,7 @@ bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
|
||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
||||
|
||||
#endif
|
||||
|
||||
4
debian/repmgr.repmgrd.default
vendored
4
debian/repmgr.repmgrd.default
vendored
@@ -12,7 +12,7 @@ REPMGRD_ENABLED=no
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGR_BIN=/usr/bin/repmgr
|
||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
||||
|
||||
# pid file
|
||||
#REPMGR_PIDFILE=/var/run/repmgrd.pid
|
||||
#REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||
|
||||
@@ -35,5 +35,6 @@
|
||||
#define ERR_BAD_SSH 12
|
||||
#define ERR_SYS_FAILURE 13
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
26
log.c
26
log.c
@@ -144,12 +144,32 @@ logger_init(t_configuration_options * opts, const char *ident, const char *level
|
||||
{
|
||||
FILE *fd;
|
||||
|
||||
fd = freopen(opts->logfile, "a", stderr);
|
||||
/* Check if we can write to the specified file before redirecting
|
||||
* stderr - if freopen() fails, stderr output will vanish into
|
||||
* the ether and the user won't know what's going on.
|
||||
*/
|
||||
|
||||
fd = fopen(opts->logfile, "a");
|
||||
if (fd == NULL)
|
||||
{
|
||||
fprintf(stderr, "error reopening stderr to '%s': %s",
|
||||
opts->logfile, strerror(errno));
|
||||
stderr_log_err(_("Unable to open specified logfile '%s' for writing: %s\n"), opts->logfile, strerror(errno));
|
||||
stderr_log_err(_("Terminating\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
fclose(fd);
|
||||
|
||||
stderr_log_notice(_("Redirecting logging output to '%s'\n"), opts->logfile);
|
||||
fd = freopen(opts->logfile, "a", stderr);
|
||||
|
||||
/* It's possible freopen() may still fail due to e.g. a race condition;
|
||||
as it's not feasible to restore stderr after a failed freopen(),
|
||||
we'll write to stdout as a last resort.
|
||||
*/
|
||||
if (fd == NULL)
|
||||
{
|
||||
printf(_("Unable to open specified logfile %s for writing: %s\n"), opts->logfile, strerror(errno));
|
||||
printf(_("Terminating\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
495
repmgr.c
495
repmgr.c
@@ -7,18 +7,19 @@
|
||||
*
|
||||
* Commands implemented are:
|
||||
*
|
||||
* MASTER REGISTER
|
||||
* [ MASTER | PRIMARY ] REGISTER
|
||||
*
|
||||
* STANDBY REGISTER
|
||||
* STANDBY UNREGISTER
|
||||
* STANDBY CLONE
|
||||
* STANDBY FOLLOW
|
||||
* STANDBY PROMOTE
|
||||
*
|
||||
* WITNESS CREATE
|
||||
*
|
||||
* CLUSTER SHOW
|
||||
* CLUSTER CLEANUP
|
||||
*
|
||||
* WITNESS CREATE
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
@@ -53,15 +54,23 @@
|
||||
|
||||
#define RECOVERY_FILE "recovery.conf"
|
||||
|
||||
#ifndef TABLESPACE_MAP
|
||||
#define TABLESPACE_MAP "tablespace_map"
|
||||
#endif
|
||||
|
||||
#define WITNESS_DEFAULT_PORT "5499" /* If this value is ever changed, remember
|
||||
* to update comments and documentation */
|
||||
|
||||
#define NO_ACTION 0 /* Dummy default action */
|
||||
#define MASTER_REGISTER 1
|
||||
#define STANDBY_REGISTER 2
|
||||
#define STANDBY_CLONE 3
|
||||
#define STANDBY_PROMOTE 4
|
||||
#define STANDBY_FOLLOW 5
|
||||
#define WITNESS_CREATE 6
|
||||
#define CLUSTER_SHOW 7
|
||||
#define CLUSTER_CLEANUP 8
|
||||
#define STANDBY_UNREGISTER 3
|
||||
#define STANDBY_CLONE 4
|
||||
#define STANDBY_PROMOTE 5
|
||||
#define STANDBY_FOLLOW 6
|
||||
#define WITNESS_CREATE 7
|
||||
#define CLUSTER_SHOW 8
|
||||
#define CLUSTER_CLEANUP 9
|
||||
|
||||
|
||||
|
||||
@@ -69,7 +78,7 @@ static bool create_recovery_file(const char *data_dir);
|
||||
static int test_ssh_connection(char *host, char *remote_user);
|
||||
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
char *local_path, bool is_directory, int server_version_num);
|
||||
static int run_basebackup(void);
|
||||
static int run_basebackup(const char *data_dir);
|
||||
static void check_parameters_for_action(const int action);
|
||||
static bool create_schema(PGconn *conn);
|
||||
static void write_primary_conninfo(char *line);
|
||||
@@ -83,6 +92,7 @@ static char *make_pg_path(char *file);
|
||||
|
||||
static void do_master_register(void);
|
||||
static void do_standby_register(void);
|
||||
static void do_standby_unregister(void);
|
||||
static void do_standby_clone(void);
|
||||
static void do_standby_promote(void);
|
||||
static void do_standby_follow(void);
|
||||
@@ -129,7 +139,7 @@ main(int argc, char **argv)
|
||||
{"port", required_argument, NULL, 'p'},
|
||||
{"username", required_argument, NULL, 'U'},
|
||||
{"superuser", required_argument, NULL, 'S'},
|
||||
{"dest-dir", required_argument, NULL, 'D'},
|
||||
{"data-dir", required_argument, NULL, 'D'},
|
||||
{"local-port", required_argument, NULL, 'l'},
|
||||
{"config-file", required_argument, NULL, 'f'},
|
||||
{"remote-user", required_argument, NULL, 'R'},
|
||||
@@ -145,6 +155,8 @@ main(int argc, char **argv)
|
||||
{"check-upstream-config", no_argument, NULL, 2},
|
||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
||||
{"ignore-external-config-files", no_argument, NULL, 4},
|
||||
{"help", no_argument, NULL, '?'},
|
||||
{"version", no_argument, NULL, 'V'},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
@@ -158,28 +170,20 @@ main(int argc, char **argv)
|
||||
|
||||
progname = get_progname(argv[0]);
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
|
||||
{
|
||||
help(progname);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
||||
{
|
||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
/* Prevent getopt_long() from printing an error message */
|
||||
opterr = 0;
|
||||
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:S:D:l:f:R:w:k:FWIvb:r:c", long_options,
|
||||
while ((c = getopt_long(argc, argv, "?Vd:h:p:U:S:D:l:f:R:w:k:FWIvb:r:c", long_options,
|
||||
&optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '?':
|
||||
help(progname);
|
||||
exit(SUCCESS);
|
||||
case 'V':
|
||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||
exit(SUCCESS);
|
||||
case 'd':
|
||||
strncpy(runtime_options.dbname, optarg, MAXLEN);
|
||||
break;
|
||||
@@ -299,8 +303,10 @@ main(int argc, char **argv)
|
||||
|
||||
/*
|
||||
* Now we need to obtain the action, this comes in one of these forms:
|
||||
* MASTER REGISTER | STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW
|
||||
* [node]} | WITNESS CREATE CLUSTER {SHOW | CLEANUP}
|
||||
* MASTER REGISTER |
|
||||
* STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
|
||||
* WITNESS CREATE |
|
||||
* CLUSTER {SHOW | CLEANUP}
|
||||
*
|
||||
* the node part is optional, if we receive it then we shouldn't have
|
||||
* received a -h option
|
||||
@@ -310,6 +316,8 @@ main(int argc, char **argv)
|
||||
server_mode = argv[optind++];
|
||||
if (strcasecmp(server_mode, "STANDBY") != 0 &&
|
||||
strcasecmp(server_mode, "MASTER") != 0 &&
|
||||
/* allow PRIMARY as synonym for MASTER */
|
||||
strcasecmp(server_mode, "PRIMARY") != 0 &&
|
||||
strcasecmp(server_mode, "WITNESS") != 0 &&
|
||||
strcasecmp(server_mode, "CLUSTER") != 0)
|
||||
{
|
||||
@@ -324,7 +332,7 @@ main(int argc, char **argv)
|
||||
{
|
||||
server_cmd = argv[optind++];
|
||||
/* check posibilities for all server modes */
|
||||
if (strcasecmp(server_mode, "MASTER") == 0)
|
||||
if (strcasecmp(server_mode, "MASTER") == 0 || strcasecmp(server_mode, "PRIMARY") == 0 )
|
||||
{
|
||||
if (strcasecmp(server_cmd, "REGISTER") == 0)
|
||||
action = MASTER_REGISTER;
|
||||
@@ -333,6 +341,8 @@ main(int argc, char **argv)
|
||||
{
|
||||
if (strcasecmp(server_cmd, "REGISTER") == 0)
|
||||
action = STANDBY_REGISTER;
|
||||
if (strcasecmp(server_cmd, "UNREGISTER") == 0)
|
||||
action = STANDBY_UNREGISTER;
|
||||
else if (strcasecmp(server_cmd, "CLONE") == 0)
|
||||
action = STANDBY_CLONE;
|
||||
else if (strcasecmp(server_cmd, "PROMOTE") == 0)
|
||||
@@ -425,7 +435,6 @@ main(int argc, char **argv)
|
||||
|
||||
if (runtime_options.verbose && runtime_options.config_file[0])
|
||||
{
|
||||
|
||||
log_notice(_("opening configuration file: %s\n"),
|
||||
runtime_options.config_file);
|
||||
}
|
||||
@@ -435,7 +444,7 @@ main(int argc, char **argv)
|
||||
* however if available we'll parse it anyway for options like 'log_level',
|
||||
* 'use_replication_slots' etc.
|
||||
*/
|
||||
config_file_parsed = parse_config(runtime_options.config_file, &options);
|
||||
config_file_parsed = load_config(runtime_options.config_file, &options, argv[0]);
|
||||
|
||||
/*
|
||||
* Initialise pg_bindir - command line parameter will override
|
||||
@@ -542,6 +551,9 @@ main(int argc, char **argv)
|
||||
case STANDBY_REGISTER:
|
||||
do_standby_register();
|
||||
break;
|
||||
case STANDBY_UNREGISTER:
|
||||
do_standby_unregister();
|
||||
break;
|
||||
case STANDBY_CLONE:
|
||||
do_standby_clone();
|
||||
break;
|
||||
@@ -691,6 +703,7 @@ static void
|
||||
do_master_register(void)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGconn *master_conn;
|
||||
|
||||
bool schema_exists = false;
|
||||
int ret;
|
||||
@@ -716,56 +729,74 @@ do_master_register(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Check if there is a schema for this cluster */
|
||||
/* Create schema and associated database objects, if it does not exist */
|
||||
schema_exists = check_cluster_schema(conn);
|
||||
|
||||
/* If schema exists and force option not selected, raise an error */
|
||||
if(schema_exists && !runtime_options.force)
|
||||
{
|
||||
log_notice(_("schema '%s' already exists.\n"), get_repmgr_schema());
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (!schema_exists)
|
||||
{
|
||||
log_info(_("master register: creating database objects inside the %s schema\n"),
|
||||
get_repmgr_schema());
|
||||
|
||||
/* ok, create the schema */
|
||||
begin_transaction(conn);
|
||||
|
||||
if (!create_schema(conn))
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
PGconn *master_conn;
|
||||
log_err(_("Unable to create repmgr schema - see preceding error message(s); aborting\n"));
|
||||
rollback_transaction(conn);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
commit_transaction(conn);
|
||||
}
|
||||
|
||||
/* Ensure there isn't any other master already registered */
|
||||
master_conn = get_master_connection(conn,
|
||||
options.cluster_name, NULL, NULL);
|
||||
if (master_conn != NULL)
|
||||
|
||||
if (master_conn != NULL && !runtime_options.force)
|
||||
{
|
||||
PQfinish(master_conn);
|
||||
log_warning(_("there is a master already in cluster %s\n"),
|
||||
log_err(_("there is a master already in cluster %s\n"),
|
||||
options.cluster_name);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
PQfinish(master_conn);
|
||||
|
||||
/* XXX we should check if a node with a different ID is registered as
|
||||
master, otherwise it would be possible to insert a duplicate record
|
||||
with --force, which would result in an unwelcome "multi-master" situation
|
||||
*/
|
||||
|
||||
/* Delete any existing record for this node if --force set */
|
||||
if (runtime_options.force)
|
||||
{
|
||||
bool node_record_deleted = delete_node_record(conn,
|
||||
PGresult *res;
|
||||
bool node_record_deleted;
|
||||
|
||||
begin_transaction(conn);
|
||||
|
||||
res = get_node_record(conn, options.cluster_name, options.node);
|
||||
if (PQntuples(res))
|
||||
{
|
||||
log_notice(_("deleting existing master record with id %i\n"), options.node);
|
||||
|
||||
node_record_deleted = delete_node_record(conn,
|
||||
options.node,
|
||||
"master register");
|
||||
|
||||
if (node_record_deleted == false)
|
||||
{
|
||||
PQfinish(master_conn);
|
||||
rollback_transaction(conn);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
commit_transaction(conn);
|
||||
}
|
||||
|
||||
|
||||
/* Now register the master */
|
||||
record_created = create_node_record(conn,
|
||||
"master register",
|
||||
@@ -908,6 +939,85 @@ do_standby_register(void)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_standby_unregister(void)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGconn *master_conn;
|
||||
int ret;
|
||||
|
||||
bool node_record_deleted;
|
||||
|
||||
log_info(_("connecting to standby database\n"));
|
||||
conn = establish_db_connection(options.conninfo, true);
|
||||
|
||||
/* Check we are a standby */
|
||||
ret = is_standby(conn);
|
||||
if (ret == 0 || ret == -1)
|
||||
{
|
||||
log_err(_(ret == 0 ? "this node should be a standby (%s)\n" :
|
||||
"connection to node (%s) lost\n"), options.conninfo);
|
||||
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Check if there is a schema for this cluster */
|
||||
if (check_cluster_schema(conn) == false)
|
||||
{
|
||||
/* schema doesn't exist */
|
||||
log_err(_("schema '%s' doesn't exist.\n"), get_repmgr_schema());
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check if there is a master in this cluster */
|
||||
log_info(_("connecting to master database\n"));
|
||||
master_conn = get_master_connection(conn, options.cluster_name,
|
||||
NULL, NULL);
|
||||
if (!master_conn)
|
||||
{
|
||||
log_err(_("a master must be defined before unregistering a slave\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that standby and master are supported and compatible server
|
||||
* versions
|
||||
*/
|
||||
check_master_standby_version_match(conn, master_conn);
|
||||
|
||||
/* Now unregister the standby */
|
||||
log_info(_("unregistering the standby\n"));
|
||||
node_record_deleted = delete_node_record(master_conn,
|
||||
options.node,
|
||||
"standby unregister");
|
||||
|
||||
if (node_record_deleted == false)
|
||||
{
|
||||
PQfinish(master_conn);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Log the event */
|
||||
create_event_record(master_conn,
|
||||
&options,
|
||||
options.node,
|
||||
"standby_unregister",
|
||||
true,
|
||||
NULL);
|
||||
|
||||
PQfinish(master_conn);
|
||||
PQfinish(conn);
|
||||
|
||||
log_info(_("standby unregistration complete\n"));
|
||||
log_notice(_("standby node correctly unregistered for cluster %s with id %d (conninfo: %s)\n"),
|
||||
options.cluster_name, options.node, options.conninfo);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_standby_clone(void)
|
||||
{
|
||||
@@ -1009,14 +1119,14 @@ do_standby_clone(void)
|
||||
*
|
||||
* -T/--tablespace-mapping is not available as a pg_basebackup option for
|
||||
* PostgreSQL 9.3 - we can only handle that with rsync, so if `--rsync-only`
|
||||
# not set, fail with an error
|
||||
* not set, fail with an error
|
||||
*/
|
||||
|
||||
if (options.tablespace_mapping.head != NULL)
|
||||
{
|
||||
TablespaceListCell *cell;
|
||||
|
||||
if(get_server_version(upstream_conn, NULL) < 90400)
|
||||
if (get_server_version(upstream_conn, NULL) < 90400 && !runtime_options.rsync_only)
|
||||
{
|
||||
log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n"));
|
||||
PQfinish(upstream_conn);
|
||||
@@ -1184,6 +1294,15 @@ do_standby_clone(void)
|
||||
|
||||
if (runtime_options.rsync_only)
|
||||
{
|
||||
PQExpBufferData tablespace_map;
|
||||
bool tablespace_map_rewrite = false;
|
||||
|
||||
/* For 9.5 and greater, create our own tablespace_map file */
|
||||
if (server_version_num >= 90500)
|
||||
{
|
||||
initPQExpBuffer(&tablespace_map);
|
||||
}
|
||||
|
||||
/*
|
||||
* From pg 9.1 default is to wait for a sync standby to ack, avoid that by
|
||||
* turning off sync rep for this session
|
||||
@@ -1258,6 +1377,7 @@ do_standby_clone(void)
|
||||
initPQExpBuffer(&tblspc_dir_dst);
|
||||
initPQExpBuffer(&tblspc_oid);
|
||||
|
||||
|
||||
appendPQExpBuffer(&tblspc_oid, "%s", PQgetvalue(res, i, 0));
|
||||
appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1));
|
||||
|
||||
@@ -1292,8 +1412,21 @@ do_standby_clone(void)
|
||||
tblspc_dir_src.data, tblspc_dir_dst.data,
|
||||
true, server_version_num);
|
||||
|
||||
/* Update symlink in pg_tblspc */
|
||||
|
||||
/* Update symlinks in pg_tblspc */
|
||||
if (mapping_found == true)
|
||||
{
|
||||
/* 9.5 and later - create a tablespace_map file */
|
||||
if (server_version_num >= 90500)
|
||||
{
|
||||
tablespace_map_rewrite = true;
|
||||
appendPQExpBuffer(&tablespace_map,
|
||||
"%s %s\n",
|
||||
tblspc_oid.data,
|
||||
tblspc_dir_dst.data);
|
||||
}
|
||||
/* Pre-9.5, we have to manipulate the symlinks in pg_tblspc/ ourselves */
|
||||
else
|
||||
{
|
||||
PQExpBufferData tblspc_symlink;
|
||||
|
||||
@@ -1315,13 +1448,49 @@ do_standby_clone(void)
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if(server_version_num >= 90500 && tablespace_map_rewrite == true)
|
||||
{
|
||||
PQExpBufferData tablespace_map_filename;
|
||||
FILE *tablespace_map_file;
|
||||
initPQExpBuffer(&tablespace_map_filename);
|
||||
appendPQExpBuffer(&tablespace_map_filename, "%s/%s",
|
||||
local_data_directory,
|
||||
TABLESPACE_MAP);
|
||||
|
||||
/* Unlink any existing file (it should be there, but we don't care if it isn't) */
|
||||
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_err(_("unable to remove tablespace_map file %s\n"), tablespace_map_filename.data);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
tablespace_map_file = fopen(tablespace_map_filename.data, "w");
|
||||
if (tablespace_map_file == NULL)
|
||||
{
|
||||
log_err(_("unable to create tablespace_map file '%s'\n"), tablespace_map_filename.data);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
|
||||
{
|
||||
log_err(_("unable to write to tablespace_map file '%s'\n"), tablespace_map_filename.data);
|
||||
fclose(tablespace_map_file);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
fclose(tablespace_map_file);
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
}
|
||||
else
|
||||
{
|
||||
r = run_basebackup();
|
||||
r = run_basebackup(local_data_directory);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: base backup failed\n"));
|
||||
@@ -1449,6 +1618,24 @@ stop_backup:
|
||||
exit(retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove existing WAL from the target directory, since
|
||||
* rsync's --exclude option doesn't do it.
|
||||
*/
|
||||
if (runtime_options.force)
|
||||
{
|
||||
char script[MAXLEN];
|
||||
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
|
||||
local_data_directory);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
|
||||
local_data_directory);
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
create_recovery_file(local_data_directory);
|
||||
|
||||
@@ -1678,6 +1865,7 @@ do_standby_follow(void)
|
||||
char script[MAXLEN];
|
||||
char master_conninfo[MAXLEN];
|
||||
PGconn *master_conn;
|
||||
int master_id;
|
||||
|
||||
int r,
|
||||
retval;
|
||||
@@ -1716,7 +1904,7 @@ do_standby_follow(void)
|
||||
}
|
||||
|
||||
master_conn = get_master_connection(conn,
|
||||
options.cluster_name, NULL, (char *) &master_conninfo);
|
||||
options.cluster_name, &master_id, (char *) &master_conninfo);
|
||||
}
|
||||
while (master_conn == NULL && runtime_options.wait_for_master);
|
||||
|
||||
@@ -1753,7 +1941,6 @@ do_standby_follow(void)
|
||||
strncpy(runtime_options.host, PQhost(master_conn), MAXLEN);
|
||||
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
|
||||
strncpy(runtime_options.username, PQuser(master_conn), MAXLEN);
|
||||
PQfinish(master_conn);
|
||||
|
||||
log_info(_("changing standby's master\n"));
|
||||
|
||||
@@ -1785,6 +1972,16 @@ do_standby_follow(void)
|
||||
exit(ERR_NO_RESTART);
|
||||
}
|
||||
|
||||
if (update_node_record_set_upstream(master_conn, options.cluster_name,
|
||||
options.node, master_id) == false)
|
||||
{
|
||||
log_err(_("unable to update upstream node"));
|
||||
PQfinish(master_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
PQfinish(master_conn);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1808,6 +2005,9 @@ do_witness_create(void)
|
||||
bool success;
|
||||
bool record_created;
|
||||
|
||||
PQconninfoOption *conninfo_options;
|
||||
PQconninfoOption *conninfo_option;
|
||||
|
||||
/* Connection parameters for master only */
|
||||
keywords[0] = "host";
|
||||
values[0] = runtime_options.host;
|
||||
@@ -1818,7 +2018,7 @@ do_witness_create(void)
|
||||
masterconn = establish_db_connection_by_params(keywords, values, true);
|
||||
if (!masterconn)
|
||||
{
|
||||
/* No event logging possible as we can't connect to the master */
|
||||
/* No event logging possible here as we can't connect to the master */
|
||||
log_err(_("unable to connect to master\n"));
|
||||
exit(ERR_DB_CON);
|
||||
}
|
||||
@@ -1873,7 +2073,7 @@ do_witness_create(void)
|
||||
}
|
||||
|
||||
/* Check this directory could be used as a PGDATA dir */
|
||||
if (!create_pg_dir(runtime_options.dest_dir, runtime_options.force))
|
||||
if (!create_witness_pg_dir(runtime_options.dest_dir, runtime_options.force))
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
initPQExpBuffer(&errmsg);
|
||||
@@ -1949,6 +2149,27 @@ do_witness_create(void)
|
||||
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname);
|
||||
fputs(buf, pg_conf);
|
||||
|
||||
|
||||
/* Attempt to extract a port number from the provided conninfo string
|
||||
* This will override any value provided with '-l/--local-port', as it's
|
||||
* what we'll later try and connect to anyway. '-l/--local-port' should
|
||||
* be deprecated.
|
||||
*/
|
||||
conninfo_options = PQconninfoParse(options.conninfo, NULL);
|
||||
|
||||
for (conninfo_option = conninfo_options; conninfo_option->keyword != NULL; conninfo_option++)
|
||||
{
|
||||
if (strcmp(conninfo_option->keyword, "port") == 0)
|
||||
{
|
||||
if (conninfo_option->val != NULL && conninfo_option->val[0] != '\0')
|
||||
{
|
||||
strncpy(runtime_options.localport, conninfo_option->val, MAXLEN);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
PQconninfoFree(conninfo_options);
|
||||
|
||||
/*
|
||||
* If not specified by the user, the default port for the witness server
|
||||
* is 5499; this is intended to support running the witness server as
|
||||
@@ -1956,7 +2177,7 @@ do_witness_create(void)
|
||||
* dedicated server.
|
||||
*/
|
||||
if (!runtime_options.localport[0])
|
||||
strncpy(runtime_options.localport, "5499", MAXLEN);
|
||||
strncpy(runtime_options.localport, WITNESS_DEFAULT_PORT, MAXLEN);
|
||||
|
||||
xsnprintf(buf, sizeof(buf), "port = %s\n", runtime_options.localport);
|
||||
fputs(buf, pg_conf);
|
||||
@@ -2105,6 +2326,19 @@ do_witness_create(void)
|
||||
|
||||
/* register ourselves in the master */
|
||||
|
||||
if (runtime_options.force)
|
||||
{
|
||||
bool node_record_deleted = delete_node_record(masterconn,
|
||||
options.node,
|
||||
"witness create");
|
||||
|
||||
if (node_record_deleted == false)
|
||||
{
|
||||
PQfinish(masterconn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
record_created = create_node_record(masterconn,
|
||||
"witness create",
|
||||
options.node,
|
||||
@@ -2134,8 +2368,12 @@ do_witness_create(void)
|
||||
|
||||
log_info(_("starting copy of configuration from master...\n"));
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
|
||||
if (!create_schema(witnessconn))
|
||||
{
|
||||
rollback_transaction(witnessconn);
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
options.node,
|
||||
@@ -2147,6 +2385,8 @@ do_witness_create(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
/* copy configuration from master, only repl_nodes is needed */
|
||||
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
|
||||
{
|
||||
@@ -2199,16 +2439,18 @@ do_witness_create(void)
|
||||
static void
|
||||
help(const char *progname)
|
||||
{
|
||||
printf(_("\n%s: Replicator manager \n"), progname);
|
||||
printf(_("%s: replication management tool for PostgreSQL\n"), progname);
|
||||
printf(_("\n"));
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
||||
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
||||
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow}\n"),
|
||||
progname);
|
||||
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
||||
printf(_("\nGeneral options:\n"));
|
||||
printf(_(" --help show this help, then exit\n"));
|
||||
printf(_(" --version output version information, then exit\n"));
|
||||
printf(_(" --verbose output verbose activity information\n"));
|
||||
printf(_("\n"));
|
||||
printf(_("General options:\n"));
|
||||
printf(_(" -?, --help show this help, then exit\n"));
|
||||
printf(_(" -V, --version output version information, then exit\n"));
|
||||
printf(_(" -v, --verbose output verbose activity information\n"));
|
||||
printf(_("\nConnection options:\n"));
|
||||
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
|
||||
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
|
||||
@@ -2218,11 +2460,12 @@ help(const char *progname)
|
||||
printf(_(" -b, --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
|
||||
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
||||
" copied to\n"));
|
||||
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
||||
printf(_(" -f, --config-file=PATH path to the configuration file\n"));
|
||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||
printf(_(" -S, --superuser=USERNAME superuser username for witness database\n" \
|
||||
" (default: postgres)\n"));
|
||||
/* remove this line in the next significant release */
|
||||
printf(_(" -l, --local-port=PORT (DEPRECATED) witness server local port (default: %s)\n"), WITNESS_DEFAULT_PORT);
|
||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
|
||||
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||
@@ -2241,8 +2484,10 @@ help(const char *progname)
|
||||
printf(_(" master register - registers the master in a cluster\n"));
|
||||
printf(_(" standby clone [node] - creates a new standby\n"));
|
||||
printf(_(" standby register - registers a standby in a cluster\n"));
|
||||
printf(_(" standby unregister - unregisters a standby in a cluster\n"));
|
||||
printf(_(" standby promote - promotes a specific standby to master\n"));
|
||||
printf(_(" standby follow - makes standby follow a new master\n"));
|
||||
printf(_(" witness create - creates a new witness server\n"));
|
||||
printf(_(" cluster show - displays information about cluster nodes\n"));
|
||||
printf(_(" cluster cleanup - prunes or truncates monitoring history\n" \
|
||||
" (monitoring history creation requires repmgrd\n" \
|
||||
@@ -2471,17 +2716,19 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
|
||||
|
||||
static int
|
||||
run_basebackup()
|
||||
run_basebackup(const char *data_dir)
|
||||
{
|
||||
char script[MAXLEN];
|
||||
int r = 0;
|
||||
PQExpBufferData params;
|
||||
TablespaceListCell *cell;
|
||||
|
||||
/* Creare pg_basebackup command line options */
|
||||
/* Create pg_basebackup command line options */
|
||||
|
||||
initPQExpBuffer(¶ms);
|
||||
|
||||
appendPQExpBuffer(¶ms, " -D %s", data_dir);
|
||||
|
||||
if (strlen(runtime_options.host))
|
||||
{
|
||||
appendPQExpBuffer(¶ms, " -h %s", runtime_options.host);
|
||||
@@ -2497,11 +2744,6 @@ run_basebackup()
|
||||
appendPQExpBuffer(¶ms, " -U %s", runtime_options.username);
|
||||
}
|
||||
|
||||
if(strlen(runtime_options.dest_dir))
|
||||
{
|
||||
appendPQExpBuffer(¶ms, " -D %s", runtime_options.dest_dir);
|
||||
}
|
||||
|
||||
if (runtime_options.fast_checkpoint) {
|
||||
appendPQExpBuffer(¶ms, " -c fast");
|
||||
}
|
||||
@@ -2577,6 +2819,23 @@ check_parameters_for_action(const int action)
|
||||
error_list_append(_("destination directory not required when executing STANDBY REGISTER"));
|
||||
}
|
||||
break;
|
||||
case STANDBY_UNREGISTER:
|
||||
|
||||
/*
|
||||
* To unregister a standby we only need the repmgr.conf we don't
|
||||
* need connection parameters to the master because we can detect
|
||||
* the master in repl_nodes
|
||||
*/
|
||||
if (runtime_options.host[0] || runtime_options.masterport[0] ||
|
||||
runtime_options.username[0] || runtime_options.dbname[0])
|
||||
{
|
||||
error_list_append(_("master connection parameters not required when executing STANDBY UNREGISTER"));
|
||||
}
|
||||
if (runtime_options.dest_dir[0])
|
||||
{
|
||||
error_list_append(_("destination directory not required when executing STANDBY UNREGISTER"));
|
||||
}
|
||||
break;
|
||||
case STANDBY_PROMOTE:
|
||||
|
||||
/*
|
||||
@@ -2676,6 +2935,7 @@ check_parameters_for_action(const int action)
|
||||
}
|
||||
|
||||
|
||||
/* The caller should wrap this function in a transaction */
|
||||
static bool
|
||||
create_schema(PGconn *conn)
|
||||
{
|
||||
@@ -2690,8 +2950,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create the schema %s: %s\n"),
|
||||
get_repmgr_schema(), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2714,6 +2977,10 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create the function repmgr_update_last_updated: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
@@ -2731,6 +2998,10 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create the function repmgr_get_last_updated: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
@@ -2759,8 +3030,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create table '%s.repl_nodes': %s\n"),
|
||||
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2783,8 +3057,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create table '%s.repl_monitor': %s\n"),
|
||||
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2806,8 +3083,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create table '%s.repl_events': %s\n"),
|
||||
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2840,8 +3120,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create view %s.repl_status: %s\n"),
|
||||
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2857,8 +3140,11 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
log_err(_("unable to create index 'idx_repl_status_sort' on '%s.repl_monitor': %s\n"),
|
||||
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -2878,6 +3164,8 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n",
|
||||
PQerrorMessage(conn));
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
@@ -2896,7 +3184,10 @@ create_schema(PGconn *conn)
|
||||
{
|
||||
fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n",
|
||||
PQerrorMessage(conn));
|
||||
|
||||
if (res != NULL)
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
@@ -2945,7 +3236,7 @@ write_primary_conninfo(char *line)
|
||||
}
|
||||
|
||||
maxlen_snprintf(conn_buf, "port=%s%s%s%s%s",
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432",
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : DEF_PGPORT_STR,
|
||||
host_buf, user_buf, password_buf,
|
||||
appname_buf);
|
||||
|
||||
@@ -3259,18 +3550,7 @@ update_node_record_set_master(PGconn *conn, int this_node_id)
|
||||
|
||||
log_debug(_("Setting %i as master and marking existing master as failed\n"), this_node_id);
|
||||
|
||||
res = PQexec(conn, "BEGIN");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to begin transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
begin_transaction(conn);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
@@ -3289,7 +3569,7 @@ update_node_record_set_master(PGconn *conn, int this_node_id)
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
PQexec(conn, "ROLLBACK");
|
||||
rollback_transaction(conn);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -3320,20 +3600,7 @@ update_node_record_set_master(PGconn *conn, int this_node_id)
|
||||
|
||||
PQclear(res);
|
||||
|
||||
res = PQexec(conn, "COMMIT");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set commit transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
return commit_transaction(conn);
|
||||
}
|
||||
|
||||
|
||||
@@ -3344,7 +3611,7 @@ do_check_upstream_config(void)
|
||||
bool config_ok;
|
||||
int server_version_num;
|
||||
|
||||
parse_config(runtime_options.config_file, &options);
|
||||
parse_config(&options);
|
||||
|
||||
/* Connection parameters for upstream server only */
|
||||
keywords[0] = "host";
|
||||
|
||||
8
repmgr.h
8
repmgr.h
@@ -20,11 +20,9 @@
|
||||
#ifndef _REPMGR_H_
|
||||
#define _REPMGR_H_
|
||||
|
||||
#include "postgres_fe.h"
|
||||
#include "libpq-fe.h"
|
||||
|
||||
|
||||
#include "getopt_long.h"
|
||||
#include <libpq-fe.h>
|
||||
#include <postgres_fe.h>
|
||||
#include <getopt_long.h>
|
||||
|
||||
#include "strutil.h"
|
||||
#include "dbutils.h"
|
||||
|
||||
152
repmgrd.c
152
repmgrd.c
@@ -88,11 +88,9 @@ static void check_node_configuration(void);
|
||||
|
||||
static void standby_monitor(void);
|
||||
static void witness_monitor(void);
|
||||
static bool check_connection(PGconn *conn, const char *type);
|
||||
static bool check_connection(PGconn **conn, const char *type, const char *conninfo);
|
||||
static bool set_local_node_failed(void);
|
||||
|
||||
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||
|
||||
static void update_shared_memory(char *last_wal_standby_applied);
|
||||
static void update_registration(void);
|
||||
static void do_master_failover(void);
|
||||
@@ -147,6 +145,8 @@ main(int argc, char **argv)
|
||||
{"monitoring-history", no_argument, NULL, 'm'},
|
||||
{"daemonize", no_argument, NULL, 'd'},
|
||||
{"pid-file", required_argument, NULL, 'p'},
|
||||
{"help", no_argument, NULL, '?'},
|
||||
{"version", no_argument, NULL, 'V'},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
@@ -160,21 +160,7 @@ main(int argc, char **argv)
|
||||
int server_version_num = 0;
|
||||
progname = get_progname(argv[0]);
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
|
||||
{
|
||||
help(progname);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
||||
{
|
||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1)
|
||||
while ((c = getopt_long(argc, argv, "?Vf:v:mdp:", long_options, &optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
@@ -193,6 +179,12 @@ main(int argc, char **argv)
|
||||
case 'p':
|
||||
pid_file = optarg;
|
||||
break;
|
||||
case '?':
|
||||
help(progname);
|
||||
exit(SUCCESS);
|
||||
case 'V':
|
||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||
exit(SUCCESS);
|
||||
default:
|
||||
usage();
|
||||
exit(ERR_BAD_CONFIG);
|
||||
@@ -208,7 +200,7 @@ main(int argc, char **argv)
|
||||
* which case we'll need to refactor parse_config() not to abort,
|
||||
* and return the error message.
|
||||
*/
|
||||
parse_config(config_file, &local_options);
|
||||
load_config(config_file, &local_options, argv[0]);
|
||||
|
||||
if (daemonize)
|
||||
{
|
||||
@@ -319,7 +311,7 @@ main(int argc, char **argv)
|
||||
check_cluster_configuration(my_local_conn);
|
||||
check_node_configuration();
|
||||
|
||||
if (reload_config(config_file, &local_options))
|
||||
if (reload_config(&local_options))
|
||||
{
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
@@ -353,7 +345,7 @@ main(int argc, char **argv)
|
||||
*/
|
||||
do
|
||||
{
|
||||
if (check_connection(master_conn, "master"))
|
||||
if (check_connection(&master_conn, "master", NULL))
|
||||
{
|
||||
sleep(local_options.monitor_interval_secs);
|
||||
}
|
||||
@@ -368,10 +360,10 @@ main(int argc, char **argv)
|
||||
if (got_SIGHUP)
|
||||
{
|
||||
/*
|
||||
* if we can reload, then could need to change
|
||||
* if we can reload the configuration file, then could need to change
|
||||
* my_local_conn
|
||||
*/
|
||||
if (reload_config(config_file, &local_options))
|
||||
if (reload_config(&local_options))
|
||||
{
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
@@ -432,7 +424,7 @@ main(int argc, char **argv)
|
||||
check_cluster_configuration(my_local_conn);
|
||||
check_node_configuration();
|
||||
|
||||
if (reload_config(config_file, &local_options))
|
||||
if (reload_config(&local_options))
|
||||
{
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
@@ -483,7 +475,7 @@ main(int argc, char **argv)
|
||||
* if we can reload, then could need to change
|
||||
* my_local_conn
|
||||
*/
|
||||
if (reload_config(config_file, &local_options))
|
||||
if (reload_config(&local_options))
|
||||
{
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
@@ -536,7 +528,7 @@ witness_monitor(void)
|
||||
* of a missing master and promotion of a standby by that standby's
|
||||
* repmgrd, so we'll loop for a while before giving up.
|
||||
*/
|
||||
connection_ok = check_connection(master_conn, "master");
|
||||
connection_ok = check_connection(&master_conn, "master", NULL);
|
||||
|
||||
if (connection_ok == false)
|
||||
{
|
||||
@@ -693,6 +685,7 @@ standby_monitor(void)
|
||||
bool did_retry = false;
|
||||
|
||||
PGconn *upstream_conn;
|
||||
char upstream_conninfo[MAXCONNINFO];
|
||||
int upstream_node_id;
|
||||
t_node_info upstream_node;
|
||||
|
||||
@@ -704,7 +697,7 @@ standby_monitor(void)
|
||||
* no point in doing much else anyway
|
||||
*/
|
||||
|
||||
if (!check_connection(my_local_conn, "standby"))
|
||||
if (!check_connection(&my_local_conn, "standby", NULL))
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
|
||||
@@ -730,7 +723,7 @@ standby_monitor(void)
|
||||
upstream_conn = get_upstream_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
local_options.node,
|
||||
&upstream_node_id, NULL);
|
||||
&upstream_node_id, upstream_conninfo);
|
||||
|
||||
type = upstream_node_id == master_options.node
|
||||
? "master"
|
||||
@@ -742,12 +735,12 @@ standby_monitor(void)
|
||||
* we cannot reconnect, try to get a new upstream node.
|
||||
*/
|
||||
|
||||
check_connection(upstream_conn, type); /* this takes up to
|
||||
* local_options.reconnect_attempts
|
||||
check_connection(&upstream_conn, type, upstream_conninfo);
|
||||
/*
|
||||
* This takes up to local_options.reconnect_attempts *
|
||||
* local_options.reconnect_intvl seconds
|
||||
*/
|
||||
|
||||
|
||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||
{
|
||||
PQfinish(upstream_conn);
|
||||
@@ -879,7 +872,7 @@ standby_monitor(void)
|
||||
log_err(_("standby node has disappeared, trying to reconnect...\n"));
|
||||
did_retry = true;
|
||||
|
||||
if (!check_connection(my_local_conn, "standby"))
|
||||
if (!check_connection(&my_local_conn, "standby", NULL))
|
||||
{
|
||||
set_local_node_failed();
|
||||
terminate(0);
|
||||
@@ -944,8 +937,9 @@ standby_monitor(void)
|
||||
master_conn = get_master_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
|
||||
}
|
||||
if (PQstatus(master_conn) != CONNECTION_OK)
|
||||
PQreset(master_conn);
|
||||
|
||||
/*
|
||||
* Cancel any query that is still being executed, so i can insert the
|
||||
@@ -1152,8 +1146,8 @@ do_master_failover(void)
|
||||
total_nodes, visible_nodes);
|
||||
|
||||
/*
|
||||
* am i on the group that should keep alive? if i see less than half of
|
||||
* total_nodes then i should do nothing
|
||||
* Am I on the group that should keep alive? If I see less than half of
|
||||
* total_nodes then I should do nothing
|
||||
*/
|
||||
if (visible_nodes < (total_nodes / 2.0))
|
||||
{
|
||||
@@ -1524,7 +1518,7 @@ do_master_failover(void)
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
|
||||
/* update node information to reflect new status */
|
||||
if(update_node_record_set_upstream(new_master_conn, node_info.node_id, best_candidate.node_id) == false)
|
||||
if (update_node_record_set_upstream(new_master_conn, local_options.cluster_name, node_info.node_id, best_candidate.node_id) == false)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Unable to update node record for node %i (following new upstream node %i)"),
|
||||
@@ -1592,7 +1586,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
* Verify that we can still talk to the cluster master even though
|
||||
* node upstream is not available
|
||||
*/
|
||||
if (!check_connection(master_conn, "master"))
|
||||
if (!check_connection(&master_conn, "master", NULL))
|
||||
{
|
||||
log_err(_("do_upstream_standby_failover(): Unable to connect to last known master node\n"));
|
||||
return false;
|
||||
@@ -1668,7 +1662,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if(update_node_record_set_upstream(master_conn, node_info.node_id, upstream_node_id) == false)
|
||||
if (update_node_record_set_upstream(master_conn, local_options.cluster_name, node_info.node_id, upstream_node_id) == false)
|
||||
{
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -1681,7 +1675,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
|
||||
|
||||
static bool
|
||||
check_connection(PGconn *conn, const char *type)
|
||||
check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
{
|
||||
int connection_retries;
|
||||
|
||||
@@ -1692,7 +1686,16 @@ check_connection(PGconn *conn, const char *type)
|
||||
*/
|
||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||
{
|
||||
if (!is_pgup(conn, local_options.master_response_timeout))
|
||||
if (*conn == NULL)
|
||||
{
|
||||
if (conninfo == NULL)
|
||||
{
|
||||
log_err("INTERNAL ERROR: *conn == NULL && conninfo == NULL");
|
||||
terminate(ERR_INTERNAL);
|
||||
}
|
||||
*conn = establish_db_connection(conninfo, false);
|
||||
}
|
||||
if (!is_pgup(*conn, local_options.master_response_timeout))
|
||||
{
|
||||
log_warning(_("connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
|
||||
type,
|
||||
@@ -1710,9 +1713,9 @@ check_connection(PGconn *conn, const char *type)
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_pgup(conn, local_options.master_response_timeout))
|
||||
if (!is_pgup(*conn, local_options.master_response_timeout))
|
||||
{
|
||||
log_err(_("unable to reconnect to %s after %i seconds...\n"),
|
||||
log_err(_("unable to reconnect to %s (timeout %i seconds)...\n"),
|
||||
type,
|
||||
local_options.master_response_timeout
|
||||
);
|
||||
@@ -1740,7 +1743,7 @@ set_local_node_failed(void)
|
||||
int active_master_node_id = NODE_NOT_FOUND;
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
if (!check_connection(master_conn, "master"))
|
||||
if (!check_connection(&master_conn, "master", NULL))
|
||||
{
|
||||
log_err(_("set_local_node_failed(): Unable to connect to last known master node\n"));
|
||||
return false;
|
||||
@@ -1966,17 +1969,21 @@ usage(void)
|
||||
void
|
||||
help(const char *progname)
|
||||
{
|
||||
printf(_("Usage: %s [OPTIONS]\n"), progname);
|
||||
printf(_("Replicator manager daemon for PostgreSQL.\n"));
|
||||
printf(_("\nOptions:\n"));
|
||||
printf(_(" --help show this help, then exit\n"));
|
||||
printf(_(" --version output version information, then exit\n"));
|
||||
printf(_("%s: replication management daemon for PostgreSQL\n"), progname);
|
||||
printf(_("\n"));
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS]\n"), progname);
|
||||
printf(_("\n"));
|
||||
printf(_("Options:\n"));
|
||||
printf(_(" -?, --help show this help, then exit\n"));
|
||||
printf(_(" -V, --version output version information, then exit\n"));
|
||||
printf(_(" -v, --verbose output verbose activity information\n"));
|
||||
printf(_(" -m, --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n"));
|
||||
printf(_(" -f, --config-file=PATH path to the configuration file\n"));
|
||||
printf(_(" -d, --daemonize detach process from foreground\n"));
|
||||
printf(_(" -p, --pid-file=PATH write a PID file\n"));
|
||||
printf(_("\n%s monitors a cluster of servers.\n"), progname);
|
||||
printf(_("\n"));
|
||||
printf(_("%s monitors a cluster of servers and optionally performs failover.\n"), progname);
|
||||
}
|
||||
|
||||
|
||||
@@ -2219,23 +2226,12 @@ check_and_create_pid_file(const char *pid_file)
|
||||
t_node_info
|
||||
get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
t_node_info node_info = { NODE_NOT_FOUND, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
|
||||
sprintf(sqlquery,
|
||||
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
local_options.cluster_name,
|
||||
node_id);
|
||||
res = get_node_record(conn, cluster, node_id);
|
||||
|
||||
log_debug("get_node_info(): %s\n", sqlquery);
|
||||
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
@@ -2299,37 +2295,3 @@ parse_node_type(const char *type)
|
||||
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
log_debug(_("update_node_record_set_upstream(): Updating node %i's upstream node to %i\n"), this_node_id, new_upstream_node_id);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET upstream_node_id = %i "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
new_upstream_node_id,
|
||||
local_options.cluster_name,
|
||||
this_node_id);
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set new upstream node id: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user