mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 08:06:29 +00:00
Merge branch 'master' of github.com:2ndQuadrant/repmgr into REL3_1_STABLE
This commit is contained in:
2
FAQ.md
2
FAQ.md
@@ -38,7 +38,7 @@ General
|
|||||||
|
|
||||||
No. Hash indexes and replication do not mix well and their use is
|
No. Hash indexes and replication do not mix well and their use is
|
||||||
explicitly discouraged; see:
|
explicitly discouraged; see:
|
||||||
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||||
|
|
||||||
`repmgr`
|
`repmgr`
|
||||||
--------
|
--------
|
||||||
|
|||||||
10
HISTORY
10
HISTORY
@@ -1,3 +1,13 @@
|
|||||||
|
3.1.4 2016-07-
|
||||||
|
repmgr: new configuration option for setting "restore_command"
|
||||||
|
in the recovery.conf file generated by repmgr (Martín)
|
||||||
|
repmgr: add --csv option to "repmgr cluster show" (Gianni)
|
||||||
|
repmgr: enable provision of a conninfo string as the -d/--dbname
|
||||||
|
parameter, similar to other PostgreSQL utilities (Ian)
|
||||||
|
repmgr: during switchover operations improve detection of
|
||||||
|
demotion candidate shutdown (Ian)
|
||||||
|
various bugfixes and documentation updates (Ian, Martín)
|
||||||
|
|
||||||
3.1.3 2016-05-17
|
3.1.3 2016-05-17
|
||||||
repmgrd: enable monitoring when a standby is catching up by
|
repmgrd: enable monitoring when a standby is catching up by
|
||||||
replaying archived WAL (Ian)
|
replaying archived WAL (Ian)
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -5,7 +5,7 @@
|
|||||||
HEADERS = $(wildcard *.h)
|
HEADERS = $(wildcard *.h)
|
||||||
|
|
||||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o
|
||||||
|
|
||||||
DATA = repmgr.sql uninstall_repmgr.sql
|
DATA = repmgr.sql uninstall_repmgr.sql
|
||||||
|
|
||||||
|
|||||||
79
README.md
79
README.md
@@ -48,7 +48,7 @@ This guide assumes that you are familiar with PostgreSQL administration and
|
|||||||
streaming replication concepts. For further details on streaming
|
streaming replication concepts. For further details on streaming
|
||||||
replication, see this link:
|
replication, see this link:
|
||||||
|
|
||||||
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
|
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
|
||||||
|
|
||||||
The following terms are used throughout the `repmgr` documentation.
|
The following terms are used throughout the `repmgr` documentation.
|
||||||
|
|
||||||
@@ -237,15 +237,19 @@ both servers.
|
|||||||
On the master server, a PostgreSQL instance must be initialised and running.
|
On the master server, a PostgreSQL instance must be initialised and running.
|
||||||
The following replication settings must be included in `postgresql.conf`:
|
The following replication settings must be included in `postgresql.conf`:
|
||||||
|
|
||||||
|
|
||||||
|
# Enable replication connections; set this figure to at least one more
|
||||||
|
# than the number of standbys which will connect to this server
|
||||||
|
# (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
|
||||||
|
# which requires two free WAL senders)
|
||||||
|
|
||||||
|
max_wal_senders = 10
|
||||||
|
|
||||||
# Ensure WAL files contain enough information to enable read-only queries
|
# Ensure WAL files contain enough information to enable read-only queries
|
||||||
# on the standby
|
# on the standby
|
||||||
|
|
||||||
wal_level = 'hot_standby'
|
wal_level = 'hot_standby'
|
||||||
|
|
||||||
# Enable up to 10 replication connections
|
|
||||||
|
|
||||||
max_wal_senders = 10
|
|
||||||
|
|
||||||
# How much WAL to retain on the master to allow a temporarily
|
# How much WAL to retain on the master to allow a temporarily
|
||||||
# disconnected standby to catch up again. The larger this is, the
|
# disconnected standby to catch up again. The larger this is, the
|
||||||
# longer the standby can be disconnected. This is needed only in
|
# longer the standby can be disconnected. This is needed only in
|
||||||
@@ -259,6 +263,14 @@ The following replication settings must be included in `postgresql.conf`:
|
|||||||
|
|
||||||
hot_standby = on
|
hot_standby = on
|
||||||
|
|
||||||
|
# Enable WAL file archiving
|
||||||
|
archive_mode = on
|
||||||
|
|
||||||
|
# Set archive command to a script or application that will safely store
|
||||||
|
# you WALs in a secure place. /bin/true is an example of a command that
|
||||||
|
# ignores archiving. Use something more sensible.
|
||||||
|
archive_command = '/bin/true'
|
||||||
|
|
||||||
|
|
||||||
* * *
|
* * *
|
||||||
|
|
||||||
@@ -458,7 +470,11 @@ so should be used with care.
|
|||||||
Further options can be passed to the `pg_basebackup` utility via
|
Further options can be passed to the `pg_basebackup` utility via
|
||||||
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||||
documentation for more details of available options:
|
documentation for more details of available options:
|
||||||
|
<<<<<<< HEAD
|
||||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||||
|
=======
|
||||||
|
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||||
|
>>>>>>> 72f9b0145afab1060dd1202c8f8937653c8b2e39
|
||||||
|
|
||||||
### Using rsync to clone a standby
|
### Using rsync to clone a standby
|
||||||
|
|
||||||
@@ -594,13 +610,13 @@ place. If using the default `pg_basebackup` method, we recommend setting
|
|||||||
pg_basebackup_options='--xlog-method=stream'
|
pg_basebackup_options='--xlog-method=stream'
|
||||||
|
|
||||||
See the `pg_basebackup` documentation for details:
|
See the `pg_basebackup` documentation for details:
|
||||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||||
|
|
||||||
Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
|
Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
|
||||||
value.
|
value.
|
||||||
|
|
||||||
Further information on replication slots in the PostgreSQL documentation:
|
Further information on replication slots in the PostgreSQL documentation:
|
||||||
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||||
|
|
||||||
|
|
||||||
Promoting a standby server with repmgr
|
Promoting a standby server with repmgr
|
||||||
@@ -699,8 +715,9 @@ updated to reflect this:
|
|||||||
|
|
||||||
|
|
||||||
Note that with cascading replication, `repmgr standby follow` can also be
|
Note that with cascading replication, `repmgr standby follow` can also be
|
||||||
used to detach a standby from its current upstream server and follow another
|
used to detach a standby from its current upstream server and follow the
|
||||||
upstream server, including the master.
|
master. However it's currently not possible to have it follow another standby;
|
||||||
|
we hope to improve this in a future release.
|
||||||
|
|
||||||
|
|
||||||
Performing a switchover with repmgr
|
Performing a switchover with repmgr
|
||||||
@@ -727,7 +744,7 @@ both passwordless SSH access and the path of `repmgr.conf` on that server.
|
|||||||
> careful preparation and with adequate attention. In particular you should
|
> careful preparation and with adequate attention. In particular you should
|
||||||
> be confident that your network environment is stable and reliable.
|
> be confident that your network environment is stable and reliable.
|
||||||
>
|
>
|
||||||
> We recommend running `repmgr standby switchover` at the most verbose
|
> We recommend running `repmgr standby switchover` at the most verbose
|
||||||
> logging level (`--log-level DEBUG --verbose`) and capturing all output
|
> logging level (`--log-level DEBUG --verbose`) and capturing all output
|
||||||
> to assist troubleshooting any problems.
|
> to assist troubleshooting any problems.
|
||||||
>
|
>
|
||||||
@@ -793,7 +810,7 @@ should have been updated to reflect this:
|
|||||||
|
|
||||||
### Caveats
|
### Caveats
|
||||||
|
|
||||||
- the functionality provided `repmgr standby switchover` is primarily aimed
|
- The functionality provided `repmgr standby switchover` is primarily aimed
|
||||||
at a two-server master/standby replication cluster and currently does
|
at a two-server master/standby replication cluster and currently does
|
||||||
not support additional standbys.
|
not support additional standbys.
|
||||||
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
||||||
@@ -802,11 +819,16 @@ should have been updated to reflect this:
|
|||||||
- `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
|
- `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
|
||||||
data checksums were enabled when the cluster was initialized. See the
|
data checksums were enabled when the cluster was initialized. See the
|
||||||
`pg_rewind` documentation for details:
|
`pg_rewind` documentation for details:
|
||||||
http://www.postgresql.org/docs/current/static/app-pgrewind.html
|
https://www.postgresql.org/docs/current/static/app-pgrewind.html
|
||||||
- `repmgrd` should not be running when a switchover is carried out, otherwise
|
- `repmgrd` should not be running when a switchover is carried out, otherwise
|
||||||
the `repmgrd` may try and promote a standby by itself.
|
the `repmgrd` may try and promote a standby by itself.
|
||||||
- Any other standbys attached to the old master will need to be manually
|
- Any other standbys attached to the old master will need to be manually
|
||||||
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
||||||
|
- You must ensure that following a server start using `pg_ctl`, log output
|
||||||
|
is not send to STDERR (the default behaviour). If logging is not configured,
|
||||||
|
We recommend setting `logging_collector=on` in `postgresql.conf` and
|
||||||
|
providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
|
||||||
|
parameter.
|
||||||
|
|
||||||
We hope to remove some of these restrictions in future versions of `repmgr`.
|
We hope to remove some of these restrictions in future versions of `repmgr`.
|
||||||
|
|
||||||
@@ -860,8 +882,8 @@ Adjust schema and node ID accordingly. A future `repmgr` release
|
|||||||
will make it possible to unregister failed standbys.
|
will make it possible to unregister failed standbys.
|
||||||
|
|
||||||
|
|
||||||
Automatic failover with repmgrd
|
Automatic failover with `repmgrd`
|
||||||
-------------------------------
|
---------------------------------
|
||||||
|
|
||||||
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
||||||
and which can automate actions such as failover and updating standbys to
|
and which can automate actions such as failover and updating standbys to
|
||||||
@@ -981,8 +1003,8 @@ during the failover:
|
|||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
|
|
||||||
repmgrd log rotation
|
`repmgrd` log rotation
|
||||||
--------------------
|
----------------------
|
||||||
|
|
||||||
Note that currently `repmgrd` does not provide logfile rotation. To ensure
|
Note that currently `repmgrd` does not provide logfile rotation. To ensure
|
||||||
the current logfile does not grow indefinitely, configure your system's `logrotate`
|
the current logfile does not grow indefinitely, configure your system's `logrotate`
|
||||||
@@ -998,8 +1020,29 @@ for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
|
|||||||
create 0600 postgres postgres
|
create 0600 postgres postgres
|
||||||
}
|
}
|
||||||
|
|
||||||
Monitoring
|
|
||||||
----------
|
`repmgrd` and PostgreSQL connection settings
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
In addition to the `repmgr` configuration settings, parameters in the
|
||||||
|
`conninfo` string influence how `repmgr` makes a network connection to
|
||||||
|
PostgreSQL. In particular, if another server in the replication cluster
|
||||||
|
is unreachable at network level, system network settings will influence
|
||||||
|
the length of time it takes to determine that the connection is not possible.
|
||||||
|
|
||||||
|
In particular explicitly setting a parameter for `connect_timeout` should
|
||||||
|
be considered; the effective minimum value of `2` (seconds) will ensure
|
||||||
|
that a connection failure at network level is reported as soon as possible,
|
||||||
|
otherwise dependeing on the system settings (e.g. `tcp_syn_retries` in Linux)
|
||||||
|
a delay of a minute or more is possible.
|
||||||
|
|
||||||
|
For further details on `conninfo` network connection parameters, see:
|
||||||
|
|
||||||
|
https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
|
||||||
|
|
||||||
|
|
||||||
|
Monitoring with `repmgrd`
|
||||||
|
-------------------------
|
||||||
|
|
||||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
||||||
constantly write standby node status information to the `repl_monitor` table,
|
constantly write standby node status information to the `repl_monitor` table,
|
||||||
|
|||||||
@@ -1,61 +0,0 @@
|
|||||||
Summary: repmgr
|
|
||||||
Name: repmgr
|
|
||||||
Version: 3.0
|
|
||||||
Release: 1
|
|
||||||
License: GPLv3
|
|
||||||
Group: System Environment/Daemons
|
|
||||||
URL: http://repmgr.org
|
|
||||||
Packager: Ian Barwick <ian@2ndquadrant.com>
|
|
||||||
Vendor: 2ndQuadrant Limited
|
|
||||||
Distribution: centos
|
|
||||||
Source0: %{name}-%{version}.tar.gz
|
|
||||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
|
||||||
|
|
||||||
%description
|
|
||||||
repmgr is a utility suite which greatly simplifies
|
|
||||||
the process of setting up and managing replication
|
|
||||||
using streaming replication within a cluster of
|
|
||||||
PostgreSQL servers.
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup
|
|
||||||
|
|
||||||
%build
|
|
||||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
|
||||||
%{__make} USE_PGXS=1
|
|
||||||
|
|
||||||
%install
|
|
||||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
|
||||||
|
|
||||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
|
||||||
%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
|
|
||||||
%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
|
|
||||||
%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
|
|
||||||
|
|
||||||
|
|
||||||
%clean
|
|
||||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
|
||||||
|
|
||||||
|
|
||||||
%files
|
|
||||||
%defattr(-,root,root)
|
|
||||||
/usr/bin/repmgr
|
|
||||||
/usr/bin/repmgrd
|
|
||||||
/usr/pgsql-9.3/bin/repmgr
|
|
||||||
/usr/pgsql-9.3/bin/repmgrd
|
|
||||||
/usr/pgsql-9.3/lib/repmgr_funcs.so
|
|
||||||
/usr/pgsql-9.3/share/contrib/repmgr.sql
|
|
||||||
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
|
|
||||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
|
|
||||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
|
|
||||||
%attr(0755,root,root)/etc/init.d/repmgrd
|
|
||||||
%attr(0644,root,root)/etc/sysconfig/repmgrd
|
|
||||||
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
|
||||||
|
|
||||||
%changelog
|
|
||||||
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
|
|
||||||
- build for repmgr 3.0
|
|
||||||
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
|
||||||
- fix witness creation to create db and user if needed
|
|
||||||
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
|
||||||
- initial build for RHEL6
|
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# chkconfig: - 75 16
|
|
||||||
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
|
||||||
# processname: repmgrd
|
|
||||||
# pidfile="/var/run/${NAME}.pid"
|
|
||||||
|
|
||||||
# Source function library.
|
|
||||||
INITD=/etc/rc.d/init.d
|
|
||||||
. $INITD/functions
|
|
||||||
|
|
||||||
# Get function listing for cross-distribution logic.
|
|
||||||
TYPESET=`typeset -f|grep "declare"`
|
|
||||||
|
|
||||||
# Get network config.
|
|
||||||
. /etc/sysconfig/network
|
|
||||||
|
|
||||||
DESC="PostgreSQL replication management and monitoring daemon"
|
|
||||||
NAME=repmgrd
|
|
||||||
|
|
||||||
REPMGRD_ENABLED=no
|
|
||||||
REPMGRD_OPTS=
|
|
||||||
REPMGRD_USER=postgres
|
|
||||||
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
|
||||||
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
|
||||||
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
|
||||||
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
|
||||||
|
|
||||||
# Read configuration variable file if it is present
|
|
||||||
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
|
||||||
|
|
||||||
# For SELinux we need to use 'runuser' not 'su'
|
|
||||||
if [ -x /sbin/runuser ]
|
|
||||||
then
|
|
||||||
SU=runuser
|
|
||||||
else
|
|
||||||
SU=su
|
|
||||||
fi
|
|
||||||
|
|
||||||
test -x $REPMGRD_BIN || exit 0
|
|
||||||
|
|
||||||
case "$REPMGRD_ENABLED" in
|
|
||||||
[Yy]*)
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
|
|
||||||
if [ -z "${REPMGRD_OPTS}" ]
|
|
||||||
then
|
|
||||||
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
start()
|
|
||||||
{
|
|
||||||
REPMGRD_START=$"Starting ${NAME} service: "
|
|
||||||
|
|
||||||
# Make sure startup-time log file is valid
|
|
||||||
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
|
||||||
then
|
|
||||||
touch "${REPMGRD_LOG}" || exit 1
|
|
||||||
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
|
||||||
chmod go-rwx "${REPMGRD_LOG}"
|
|
||||||
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -n "${REPMGRD_START}"
|
|
||||||
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
|
||||||
sleep 2
|
|
||||||
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
|
||||||
if [ "x${pid}" != "x" ]
|
|
||||||
then
|
|
||||||
success "${REPMGRD_START}"
|
|
||||||
touch "${REPMGRD_LOCK}"
|
|
||||||
echo $pid > "${REPMGRD_PIDFILE}"
|
|
||||||
echo
|
|
||||||
else
|
|
||||||
failure "${REPMGRD_START}"
|
|
||||||
echo
|
|
||||||
script_result=1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
stop()
|
|
||||||
{
|
|
||||||
echo -n $"Stopping ${NAME} service: "
|
|
||||||
if [ -e "${REPMGRD_LOCK}" ]
|
|
||||||
then
|
|
||||||
killproc ${NAME}
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -eq 0 ]
|
|
||||||
then
|
|
||||||
echo_success
|
|
||||||
rm -f "${REPMGRD_PIDFILE}"
|
|
||||||
rm -f "${REPMGRD_LOCK}"
|
|
||||||
else
|
|
||||||
echo_failure
|
|
||||||
script_result=1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
# not running; per LSB standards this is "ok"
|
|
||||||
echo_success
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# See how we were called.
|
|
||||||
case "$1" in
|
|
||||||
start)
|
|
||||||
start
|
|
||||||
;;
|
|
||||||
stop)
|
|
||||||
stop
|
|
||||||
;;
|
|
||||||
status)
|
|
||||||
status -p $REPMGRD_PIDFILE $NAME
|
|
||||||
script_result=$?
|
|
||||||
;;
|
|
||||||
restart)
|
|
||||||
stop
|
|
||||||
start
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo $"Usage: $0 {start|stop|status|restart}"
|
|
||||||
exit 2
|
|
||||||
esac
|
|
||||||
|
|
||||||
exit $script_result
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# default settings for repmgrd. This file is source by /bin/sh from
|
|
||||||
# /etc/init.d/repmgrd
|
|
||||||
|
|
||||||
# disable repmgrd by default so it won't get started upon installation
|
|
||||||
# valid values: yes/no
|
|
||||||
REPMGRD_ENABLED=no
|
|
||||||
|
|
||||||
# Options for repmgrd (required)
|
|
||||||
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
|
||||||
|
|
||||||
# User to run repmgrd as
|
|
||||||
#REPMGRD_USER=postgres
|
|
||||||
|
|
||||||
# repmgrd binary
|
|
||||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
|
||||||
|
|
||||||
# pid file
|
|
||||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
|
||||||
|
|
||||||
# log file
|
|
||||||
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
|
||||||
5
TODO
5
TODO
@@ -53,8 +53,9 @@ Planned feature improvements
|
|||||||
requested, activate the replication slot using pg_receivexlog to negate the
|
requested, activate the replication slot using pg_receivexlog to negate the
|
||||||
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
||||||
|
|
||||||
* Take into account the fact that a standby can obtain WAL from an archive,
|
* repmgr: enable "standby follow" to point a standby at another standby, not
|
||||||
so even if direct streaming replication is interrupted, it may be up-to-date
|
just the replication cluster master (see GitHub #130)
|
||||||
|
|
||||||
|
|
||||||
Usability improvements
|
Usability improvements
|
||||||
======================
|
======================
|
||||||
|
|||||||
10
config.c
10
config.c
@@ -28,7 +28,7 @@ static void parse_event_notifications_list(t_configuration_options *options, con
|
|||||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||||
static void exit_with_errors(ErrorList *config_errors);
|
static void exit_with_errors(ErrorList *config_errors);
|
||||||
|
|
||||||
const static char *_progname = '\0';
|
const static char *_progname = NULL;
|
||||||
static char config_file_path[MAXPGPATH];
|
static char config_file_path[MAXPGPATH];
|
||||||
static bool config_file_provided = false;
|
static bool config_file_provided = false;
|
||||||
bool config_file_found = false;
|
bool config_file_found = false;
|
||||||
@@ -224,6 +224,7 @@ parse_config(t_configuration_options *options)
|
|||||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||||
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
|
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
|
||||||
|
memset(options->restore_command, 0, sizeof(options->restore_command));
|
||||||
|
|
||||||
/* default master_response_timeout is 60 seconds */
|
/* default master_response_timeout is 60 seconds */
|
||||||
options->master_response_timeout = 60;
|
options->master_response_timeout = 60;
|
||||||
@@ -239,6 +240,8 @@ parse_config(t_configuration_options *options)
|
|||||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||||
|
|
||||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||||
|
options->event_notifications.head = NULL;
|
||||||
|
options->event_notifications.tail = NULL;
|
||||||
|
|
||||||
options->tablespace_mapping.head = NULL;
|
options->tablespace_mapping.head = NULL;
|
||||||
options->tablespace_mapping.tail = NULL;
|
options->tablespace_mapping.tail = NULL;
|
||||||
@@ -340,7 +343,8 @@ parse_config(t_configuration_options *options)
|
|||||||
strncpy(options->follow_command, value, MAXLEN);
|
strncpy(options->follow_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "master_response_timeout") == 0)
|
else if (strcmp(name, "master_response_timeout") == 0)
|
||||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||||
/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
/*
|
||||||
|
* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||||
* we'll switch terminology in a future release (3.1?)
|
* we'll switch terminology in a future release (3.1?)
|
||||||
*/
|
*/
|
||||||
else if (strcmp(name, "primary_response_timeout") == 0)
|
else if (strcmp(name, "primary_response_timeout") == 0)
|
||||||
@@ -372,6 +376,8 @@ parse_config(t_configuration_options *options)
|
|||||||
parse_event_notifications_list(options, value);
|
parse_event_notifications_list(options, value);
|
||||||
else if (strcmp(name, "tablespace_mapping") == 0)
|
else if (strcmp(name, "tablespace_mapping") == 0)
|
||||||
tablespace_list_append(options, value);
|
tablespace_list_append(options, value);
|
||||||
|
else if (strcmp(name, "restore_command") == 0)
|
||||||
|
strncpy(options->restore_command, value, MAXLEN);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
known_parameter = false;
|
known_parameter = false;
|
||||||
|
|||||||
7
config.h
7
config.h
@@ -72,6 +72,7 @@ typedef struct
|
|||||||
char pg_bindir[MAXLEN];
|
char pg_bindir[MAXLEN];
|
||||||
char pg_ctl_options[MAXLEN];
|
char pg_ctl_options[MAXLEN];
|
||||||
char pg_basebackup_options[MAXLEN];
|
char pg_basebackup_options[MAXLEN];
|
||||||
|
char restore_command[MAXLEN];
|
||||||
char logfile[MAXLEN];
|
char logfile[MAXLEN];
|
||||||
int monitor_interval_secs;
|
int monitor_interval_secs;
|
||||||
int retry_promote_interval_secs;
|
int retry_promote_interval_secs;
|
||||||
@@ -82,7 +83,11 @@ typedef struct
|
|||||||
TablespaceList tablespace_mapping;
|
TablespaceList tablespace_mapping;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
/*
|
||||||
|
* The following will initialize the structure with a minimal set of options;
|
||||||
|
* actual defaults are set in parse_config() before parsing the configuration file
|
||||||
|
*/
|
||||||
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
typedef struct ErrorListCell
|
||||||
{
|
{
|
||||||
|
|||||||
55
dbutils.c
55
dbutils.c
@@ -31,6 +31,7 @@
|
|||||||
char repmgr_schema[MAXLEN] = "";
|
char repmgr_schema[MAXLEN] = "";
|
||||||
char repmgr_schema_quoted[MAXLEN] = "";
|
char repmgr_schema_quoted[MAXLEN] = "";
|
||||||
|
|
||||||
|
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
|
||||||
|
|
||||||
PGconn *
|
PGconn *
|
||||||
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
|
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
|
||||||
@@ -538,7 +539,7 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)
|
|||||||
|
|
||||||
conninfo_options = PQconninfoParse(conninfo, NULL);
|
conninfo_options = PQconninfoParse(conninfo, NULL);
|
||||||
|
|
||||||
if (conninfo_options == false)
|
if (conninfo_options == NULL)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
|
log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
|
||||||
return false;
|
return false;
|
||||||
@@ -1681,8 +1682,7 @@ int
|
|||||||
get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
|
get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
PGresult *res;
|
int result;
|
||||||
int ntuples;
|
|
||||||
|
|
||||||
sqlquery_snprintf(
|
sqlquery_snprintf(
|
||||||
sqlquery,
|
sqlquery,
|
||||||
@@ -1696,6 +1696,51 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
|||||||
|
|
||||||
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||||
|
|
||||||
|
if (result == 0)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info)
|
||||||
|
{
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
int result;
|
||||||
|
|
||||||
|
sqlquery_snprintf(
|
||||||
|
sqlquery,
|
||||||
|
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
|
||||||
|
" FROM %s.repl_nodes "
|
||||||
|
" WHERE cluster = '%s' "
|
||||||
|
" AND name = '%s'",
|
||||||
|
get_repmgr_schema_quoted(conn),
|
||||||
|
cluster,
|
||||||
|
node_name);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||||
|
|
||||||
|
if (result == 0)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info)
|
||||||
|
{
|
||||||
|
int ntuples;
|
||||||
|
PGresult *res;
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -1706,7 +1751,6 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
|||||||
|
|
||||||
if (ntuples == 0)
|
if (ntuples == 0)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1727,6 +1771,9 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
||||||
{
|
{
|
||||||
|
|||||||
26
dbutils.h
26
dbutils.h
@@ -52,18 +52,6 @@ typedef struct s_node_info
|
|||||||
} t_node_info;
|
} t_node_info;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Struct to store replication slot information
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct s_replication_slot
|
|
||||||
{
|
|
||||||
char slot_name[MAXLEN];
|
|
||||||
char slot_type[MAXLEN];
|
|
||||||
bool active;
|
|
||||||
} t_replication_slot;
|
|
||||||
|
|
||||||
|
|
||||||
#define T_NODE_INFO_INITIALIZER { \
|
#define T_NODE_INFO_INITIALIZER { \
|
||||||
NODE_NOT_FOUND, \
|
NODE_NOT_FOUND, \
|
||||||
NO_UPSTREAM_NODE, \
|
NO_UPSTREAM_NODE, \
|
||||||
@@ -78,6 +66,19 @@ typedef struct s_replication_slot
|
|||||||
InvalidXLogRecPtr \
|
InvalidXLogRecPtr \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Struct to store replication slot information
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct s_replication_slot
|
||||||
|
{
|
||||||
|
char slot_name[MAXLEN];
|
||||||
|
char slot_type[MAXLEN];
|
||||||
|
bool active;
|
||||||
|
} t_replication_slot;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PGconn *_establish_db_connection(const char *conninfo,
|
PGconn *_establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error,
|
const bool exit_on_error,
|
||||||
const bool log_notice);
|
const bool log_notice);
|
||||||
@@ -125,6 +126,7 @@ bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *c
|
|||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||||
|
int get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
|
||||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||||
|
|||||||
4
debian/DEBIAN/control
vendored
4
debian/DEBIAN/control
vendored
@@ -1,9 +1,9 @@
|
|||||||
Package: repmgr-auto
|
Package: repmgr-auto
|
||||||
Version: 3.0.1
|
Version: 3.1.3
|
||||||
Section: database
|
Section: database
|
||||||
Priority: optional
|
Priority: optional
|
||||||
Architecture: all
|
Architecture: all
|
||||||
Depends: rsync, postgresql-9.3 | postgresql-9.4
|
Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5
|
||||||
Maintainer: Self built package <user@localhost>
|
Maintainer: Self built package <user@localhost>
|
||||||
Description: PostgreSQL replication setup, magament and monitoring
|
Description: PostgreSQL replication setup, magament and monitoring
|
||||||
has two main executables
|
has two main executables
|
||||||
|
|||||||
194
dirmod.c
Normal file
194
dirmod.c
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* dirmod.c
|
||||||
|
* directory handling functions
|
||||||
|
*
|
||||||
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres_fe.h"
|
||||||
|
|
||||||
|
/* Don't modify declarations in system headers */
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pgfnames
|
||||||
|
*
|
||||||
|
* return a list of the names of objects in the argument directory. Caller
|
||||||
|
* must call pgfnames_cleanup later to free the memory allocated by this
|
||||||
|
* function.
|
||||||
|
*/
|
||||||
|
char **
|
||||||
|
pgfnames(const char *path)
|
||||||
|
{
|
||||||
|
DIR *dir;
|
||||||
|
struct dirent *file;
|
||||||
|
char **filenames;
|
||||||
|
int numnames = 0;
|
||||||
|
int fnsize = 200; /* enough for many small dbs */
|
||||||
|
|
||||||
|
dir = opendir(path);
|
||||||
|
if (dir == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
filenames = (char **) palloc(fnsize * sizeof(char *));
|
||||||
|
|
||||||
|
while (errno = 0, (file = readdir(dir)) != NULL)
|
||||||
|
{
|
||||||
|
if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
|
||||||
|
{
|
||||||
|
if (numnames + 1 >= fnsize)
|
||||||
|
{
|
||||||
|
fnsize *= 2;
|
||||||
|
filenames = (char **) repalloc(filenames,
|
||||||
|
fnsize * sizeof(char *));
|
||||||
|
}
|
||||||
|
filenames[numnames++] = pstrdup(file->d_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errno)
|
||||||
|
{
|
||||||
|
fprintf(stderr, _("could not read directory \"%s\": %s\n"),
|
||||||
|
path, strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
filenames[numnames] = NULL;
|
||||||
|
|
||||||
|
if (closedir(dir))
|
||||||
|
{
|
||||||
|
fprintf(stderr, _("could not close directory \"%s\": %s\n"),
|
||||||
|
path, strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
return filenames;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pgfnames_cleanup
|
||||||
|
*
|
||||||
|
* deallocate memory used for filenames
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pgfnames_cleanup(char **filenames)
|
||||||
|
{
|
||||||
|
char **fn;
|
||||||
|
|
||||||
|
for (fn = filenames; *fn; fn++)
|
||||||
|
pfree(*fn);
|
||||||
|
|
||||||
|
pfree(filenames);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rmtree
|
||||||
|
*
|
||||||
|
* Delete a directory tree recursively.
|
||||||
|
* Assumes path points to a valid directory.
|
||||||
|
* Deletes everything under path.
|
||||||
|
* If rmtopdir is true deletes the directory too.
|
||||||
|
* Returns true if successful, false if there was any problem.
|
||||||
|
* (The details of the problem are reported already, so caller
|
||||||
|
* doesn't really have to say anything more, but most do.)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
rmtree(const char *path, bool rmtopdir)
|
||||||
|
{
|
||||||
|
bool result = true;
|
||||||
|
char pathbuf[MAXPGPATH];
|
||||||
|
char **filenames;
|
||||||
|
char **filename;
|
||||||
|
struct stat statbuf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we copy all the names out of the directory before we start modifying
|
||||||
|
* it.
|
||||||
|
*/
|
||||||
|
filenames = pgfnames(path);
|
||||||
|
|
||||||
|
if (filenames == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* now we have the names we can start removing things */
|
||||||
|
for (filename = filenames; *filename; filename++)
|
||||||
|
{
|
||||||
|
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's ok if the file is not there anymore; we were just about to
|
||||||
|
* delete it anyway.
|
||||||
|
*
|
||||||
|
* This is not an academic possibility. One scenario where this
|
||||||
|
* happens is when bgwriter has a pending unlink request for a file in
|
||||||
|
* a database that's being dropped. In dropdb(), we call
|
||||||
|
* ForgetDatabaseFsyncRequests() to flush out any such pending unlink
|
||||||
|
* requests, but because that's asynchronous, it's not guaranteed that
|
||||||
|
* the bgwriter receives the message in time.
|
||||||
|
*/
|
||||||
|
if (lstat(pathbuf, &statbuf) != 0)
|
||||||
|
{
|
||||||
|
if (errno != ENOENT)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S_ISDIR(statbuf.st_mode))
|
||||||
|
{
|
||||||
|
/* call ourselves recursively for a directory */
|
||||||
|
if (!rmtree(pathbuf, true))
|
||||||
|
{
|
||||||
|
/* we already reported the error */
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (unlink(pathbuf) != 0)
|
||||||
|
{
|
||||||
|
if (errno != ENOENT)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rmtopdir)
|
||||||
|
{
|
||||||
|
if (rmdir(path) != 0)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pgfnames_cleanup(filenames);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
23
dirmod.h
Normal file
23
dirmod.h
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* dirmod.h
|
||||||
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _DIRMOD_H_
|
||||||
|
#define _DIRMOD_H_
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -38,5 +38,6 @@
|
|||||||
#define ERR_INTERNAL 15
|
#define ERR_INTERNAL 15
|
||||||
#define ERR_MONITORING_FAIL 16
|
#define ERR_MONITORING_FAIL 16
|
||||||
#define ERR_BAD_BACKUP_LABEL 17
|
#define ERR_BAD_BACKUP_LABEL 17
|
||||||
|
#define ERR_SWITCHOVER_FAIL 18
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
345
repmgr.c
345
repmgr.c
@@ -87,7 +87,7 @@ static bool create_recovery_file(const char *data_dir);
|
|||||||
static int test_ssh_connection(char *host, char *remote_user);
|
static int test_ssh_connection(char *host, char *remote_user);
|
||||||
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||||
char *local_path, bool is_directory, int server_version_num);
|
char *local_path, bool is_directory, int server_version_num);
|
||||||
static int run_basebackup(const char *data_dir);
|
static int run_basebackup(const char *data_dir, int server_version);
|
||||||
static void check_parameters_for_action(const int action);
|
static void check_parameters_for_action(const int action);
|
||||||
static bool create_schema(PGconn *conn);
|
static bool create_schema(PGconn *conn);
|
||||||
static void write_primary_conninfo(char *line);
|
static void write_primary_conninfo(char *line);
|
||||||
@@ -121,7 +121,7 @@ static bool remote_command(const char *host, const char *user, const char *comma
|
|||||||
static void format_db_cli_params(const char *conninfo, char *output);
|
static void format_db_cli_params(const char *conninfo, char *output);
|
||||||
static bool copy_file(const char *old_filename, const char *new_filename);
|
static bool copy_file(const char *old_filename, const char *new_filename);
|
||||||
|
|
||||||
static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
|
static bool read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
|
||||||
|
|
||||||
/* Global variables */
|
/* Global variables */
|
||||||
static const char *keywords[6];
|
static const char *keywords[6];
|
||||||
@@ -187,6 +187,7 @@ main(int argc, char **argv)
|
|||||||
{"config-archive-dir", required_argument, NULL, 5},
|
{"config-archive-dir", required_argument, NULL, 5},
|
||||||
{"pg_rewind", optional_argument, NULL, 6},
|
{"pg_rewind", optional_argument, NULL, 6},
|
||||||
{"pwprompt", optional_argument, NULL, 7},
|
{"pwprompt", optional_argument, NULL, 7},
|
||||||
|
{"csv", no_argument, NULL, 8},
|
||||||
{"help", no_argument, NULL, '?'},
|
{"help", no_argument, NULL, '?'},
|
||||||
{"version", no_argument, NULL, 'V'},
|
{"version", no_argument, NULL, 'V'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
@@ -427,6 +428,9 @@ main(int argc, char **argv)
|
|||||||
case 7:
|
case 7:
|
||||||
runtime_options.witness_pwprompt = true;
|
runtime_options.witness_pwprompt = true;
|
||||||
break;
|
break;
|
||||||
|
case 8:
|
||||||
|
runtime_options.csv_mode = true;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
@@ -764,9 +768,9 @@ do_cluster_show(void)
|
|||||||
conn = establish_db_connection(options.conninfo, true);
|
conn = establish_db_connection(options.conninfo, true);
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT conninfo, type, name, upstream_node_name"
|
"SELECT conninfo, type, name, upstream_node_name, id"
|
||||||
" FROM %s.repl_show_nodes",
|
" FROM %s.repl_show_nodes",
|
||||||
get_repmgr_schema_quoted(conn));
|
get_repmgr_schema_quoted(conn));
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery );
|
log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery );
|
||||||
|
|
||||||
@@ -813,21 +817,24 @@ do_cluster_show(void)
|
|||||||
upstream_length = upstream_length_cur;
|
upstream_length = upstream_length_cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header);
|
if (! runtime_options.csv_mode)
|
||||||
printf("----------+-");
|
{
|
||||||
|
printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header);
|
||||||
|
printf("----------+-");
|
||||||
|
|
||||||
for (i = 0; i < name_length; i++)
|
for (i = 0; i < name_length; i++)
|
||||||
printf("-");
|
printf("-");
|
||||||
|
|
||||||
printf("-|-");
|
printf("-|-");
|
||||||
for (i = 0; i < upstream_length; i++)
|
for (i = 0; i < upstream_length; i++)
|
||||||
printf("-");
|
printf("-");
|
||||||
|
|
||||||
printf("-|-");
|
printf("-|-");
|
||||||
for (i = 0; i < conninfo_length; i++)
|
for (i = 0; i < conninfo_length; i++)
|
||||||
printf("-");
|
printf("-");
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
@@ -841,11 +848,20 @@ do_cluster_show(void)
|
|||||||
else
|
else
|
||||||
strcpy(node_role, "* master");
|
strcpy(node_role, "* master");
|
||||||
|
|
||||||
printf("%-10s", node_role);
|
if (runtime_options.csv_mode)
|
||||||
printf("| %-*s ", name_length, PQgetvalue(res, i, 2));
|
{
|
||||||
printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3));
|
int connection_status =
|
||||||
printf("| %s\n", PQgetvalue(res, i, 0));
|
(PQstatus(conn) == CONNECTION_OK) ?
|
||||||
|
(is_standby(conn) ? 1 : 0) : -1;
|
||||||
|
printf("%s,%d\n", PQgetvalue(res, i, 4), connection_status);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("%-10s", node_role);
|
||||||
|
printf("| %-*s ", name_length, PQgetvalue(res, i, 2));
|
||||||
|
printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3));
|
||||||
|
printf("| %s\n", PQgetvalue(res, i, 0));
|
||||||
|
}
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1110,8 +1126,9 @@ do_standby_register(void)
|
|||||||
PGconn *master_conn;
|
PGconn *master_conn;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
|
||||||
bool record_created;
|
bool record_created;
|
||||||
|
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
int node_result;
|
||||||
|
|
||||||
log_info(_("connecting to standby database\n"));
|
log_info(_("connecting to standby database\n"));
|
||||||
conn = establish_db_connection(options.conninfo, true);
|
conn = establish_db_connection(options.conninfo, true);
|
||||||
@@ -1168,6 +1185,28 @@ do_standby_register(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that an active node with the same node_name doesn't exist already
|
||||||
|
*/
|
||||||
|
|
||||||
|
node_result = get_node_record_by_name(master_conn,
|
||||||
|
options.cluster_name,
|
||||||
|
options.node_name,
|
||||||
|
&node_record);
|
||||||
|
|
||||||
|
if (node_result)
|
||||||
|
{
|
||||||
|
if (node_record.active == true)
|
||||||
|
{
|
||||||
|
log_err(_("Node %i exists already with node_name \"%s\"\n"),
|
||||||
|
node_record.node_id,
|
||||||
|
options.node_name);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
record_created = create_node_record(master_conn,
|
record_created = create_node_record(master_conn,
|
||||||
"standby register",
|
"standby register",
|
||||||
options.node,
|
options.node,
|
||||||
@@ -1393,7 +1432,7 @@ do_standby_clone(void)
|
|||||||
|
|
||||||
if (*runtime_options.recovery_min_apply_delay)
|
if (*runtime_options.recovery_min_apply_delay)
|
||||||
{
|
{
|
||||||
if (get_server_version(upstream_conn, NULL) < 90400)
|
if (server_version_num < 90400)
|
||||||
{
|
{
|
||||||
log_err(_("PostgreSQL 9.4 or greater required for --recovery-min-apply-delay\n"));
|
log_err(_("PostgreSQL 9.4 or greater required for --recovery-min-apply-delay\n"));
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -1417,7 +1456,7 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
TablespaceListCell *cell;
|
TablespaceListCell *cell;
|
||||||
|
|
||||||
if (get_server_version(upstream_conn, NULL) < 90400 && !runtime_options.rsync_only)
|
if (server_version_num < 90400 && !runtime_options.rsync_only)
|
||||||
{
|
{
|
||||||
log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n"));
|
log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n"));
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -1673,12 +1712,13 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Read backup label copied from primary */
|
/* Read backup label copied from primary */
|
||||||
/* XXX ensure this function does not exit on error as we'd need to stop the backup */
|
if (read_backup_label(local_data_directory, &backup_label) == false)
|
||||||
read_backup_label(local_data_directory, &backup_label);
|
{
|
||||||
|
r = retval = ERR_BAD_BACKUP_LABEL;
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
|
|
||||||
printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file);
|
/* Copy tablespaces and, if required, remap to a new location */
|
||||||
|
|
||||||
/* Handle tablespaces */
|
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
" SELECT oid, pg_tablespace_location(oid) AS spclocation "
|
" SELECT oid, pg_tablespace_location(oid) AS spclocation "
|
||||||
@@ -1715,7 +1755,6 @@ do_standby_clone(void)
|
|||||||
appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1));
|
appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1));
|
||||||
|
|
||||||
/* Check if tablespace path matches one of the provided tablespace mappings */
|
/* Check if tablespace path matches one of the provided tablespace mappings */
|
||||||
|
|
||||||
if (options.tablespace_mapping.head != NULL)
|
if (options.tablespace_mapping.head != NULL)
|
||||||
{
|
{
|
||||||
for (cell = options.tablespace_mapping.head; cell; cell = cell->next)
|
for (cell = options.tablespace_mapping.head; cell; cell = cell->next)
|
||||||
@@ -1758,10 +1797,15 @@ do_standby_clone(void)
|
|||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update symlinks in pg_tblspc */
|
/*
|
||||||
|
* If a valid mapping was provide for this tablespace, arrange for it to
|
||||||
|
* be remapped
|
||||||
|
* (if no tablespace mappings was provided, the link will be copied as-is
|
||||||
|
* by pg_basebackup or rsync and no action is required)
|
||||||
|
*/
|
||||||
if (mapping_found == true)
|
if (mapping_found == true)
|
||||||
{
|
{
|
||||||
/* 9.5 and later - create a tablespace_map file */
|
/* 9.5 and later - append to the tablespace_map file */
|
||||||
if (server_version_num >= 90500)
|
if (server_version_num >= 90500)
|
||||||
{
|
{
|
||||||
tablespace_map_rewrite = true;
|
tablespace_map_rewrite = true;
|
||||||
@@ -1805,6 +1849,13 @@ do_standby_clone(void)
|
|||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For 9.5 and later, if tablespace remapping was requested, we'll need
|
||||||
|
* to rewrite the tablespace map file ourselves.
|
||||||
|
* The tablespace map file is read on startup and any links created by
|
||||||
|
* the backend; we could do this ourselves like for pre-9.5 servers, but
|
||||||
|
* it's better to rely on functionality the backend provides.
|
||||||
|
*/
|
||||||
if (server_version_num >= 90500 && tablespace_map_rewrite == true)
|
if (server_version_num >= 90500 && tablespace_map_rewrite == true)
|
||||||
{
|
{
|
||||||
PQExpBufferData tablespace_map_filename;
|
PQExpBufferData tablespace_map_filename;
|
||||||
@@ -1817,7 +1868,9 @@ do_standby_clone(void)
|
|||||||
/* Unlink any existing file (it should be there, but we don't care if it isn't) */
|
/* Unlink any existing file (it should be there, but we don't care if it isn't) */
|
||||||
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
||||||
{
|
{
|
||||||
log_err(_("unable to remove tablespace_map file %s\n"), tablespace_map_filename.data);
|
log_err(_("unable to remove tablespace_map file %s: %s\n"),
|
||||||
|
tablespace_map_filename.data,
|
||||||
|
strerror(errno));
|
||||||
|
|
||||||
r = retval = ERR_BAD_BASEBACKUP;
|
r = retval = ERR_BAD_BASEBACKUP;
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
@@ -1845,7 +1898,7 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
r = run_basebackup(local_data_directory);
|
r = run_basebackup(local_data_directory, server_version_num);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("standby clone: base backup failed\n"));
|
log_warning(_("standby clone: base backup failed\n"));
|
||||||
@@ -1988,8 +2041,8 @@ stop_backup:
|
|||||||
*/
|
*/
|
||||||
if (runtime_options.rsync_only)
|
if (runtime_options.rsync_only)
|
||||||
{
|
{
|
||||||
char script[MAXLEN];
|
|
||||||
char label_path[MAXPGPATH];
|
char label_path[MAXPGPATH];
|
||||||
|
char dirpath[MAXLEN] = "";
|
||||||
|
|
||||||
if (runtime_options.force)
|
if (runtime_options.force)
|
||||||
{
|
{
|
||||||
@@ -1997,13 +2050,13 @@ stop_backup:
|
|||||||
* Remove any existing WAL from the target directory, since
|
* Remove any existing WAL from the target directory, since
|
||||||
* rsync's --exclude option doesn't do it.
|
* rsync's --exclude option doesn't do it.
|
||||||
*/
|
*/
|
||||||
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
|
|
||||||
local_data_directory);
|
maxlen_snprintf(dirpath, "%s/pg_xlog/", local_data_directory);
|
||||||
r = system(script);
|
|
||||||
if (r != 0)
|
if (!rmtree(dirpath, false))
|
||||||
{
|
{
|
||||||
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
|
log_err(_("unable to empty local WAL directory %s\n"),
|
||||||
local_data_directory);
|
dirpath);
|
||||||
exit(ERR_BAD_RSYNC);
|
exit(ERR_BAD_RSYNC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2024,17 +2077,17 @@ stop_backup:
|
|||||||
* functionality of replication slots
|
* functionality of replication slots
|
||||||
*/
|
*/
|
||||||
if (server_version_num >= 90400 &&
|
if (server_version_num >= 90400 &&
|
||||||
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
|
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*",
|
maxlen_snprintf(dirpath, "%s/pg_replslot/*",
|
||||||
local_data_directory);
|
local_data_directory);
|
||||||
|
|
||||||
log_debug("deleting pg_replslot directory contents\n");
|
log_debug("deleting pg_replslot directory contents\n");
|
||||||
r = system(script);
|
|
||||||
if (r != 0)
|
if (!rmtree(dirpath, false))
|
||||||
{
|
{
|
||||||
log_err(_("unable to empty replication slot directory %s/pg_replslot/\n"),
|
log_err(_("unable to empty replication slot directory %s\n"),
|
||||||
local_data_directory);
|
dirpath);
|
||||||
exit(ERR_BAD_RSYNC);
|
exit(ERR_BAD_RSYNC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2151,8 +2204,7 @@ parse_label_lsn(const char *label_key, const char *label_value)
|
|||||||
{
|
{
|
||||||
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
||||||
label_key, label_value);
|
label_key, label_value);
|
||||||
|
return InvalidXLogRecPtr;
|
||||||
exit(ERR_BAD_BACKUP_LABEL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
@@ -2173,7 +2225,7 @@ parse_label_lsn(const char *label_key, const char *label_value)
|
|||||||
*
|
*
|
||||||
*======================================
|
*======================================
|
||||||
*/
|
*/
|
||||||
static void
|
static bool
|
||||||
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
|
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
|
||||||
{
|
{
|
||||||
char label_path[MAXPGPATH];
|
char label_path[MAXPGPATH];
|
||||||
@@ -2198,7 +2250,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
{
|
{
|
||||||
log_err(_("read_backup_label: could not open backup label file %s: %s"),
|
log_err(_("read_backup_label: could not open backup label file %s: %s"),
|
||||||
label_path, strerror(errno));
|
label_path, strerror(errno));
|
||||||
exit(ERR_BAD_BACKUP_LABEL);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_info(_("read_backup_label: parsing backup label file '%s'\n"),
|
log_info(_("read_backup_label: parsing backup label file '%s'\n"),
|
||||||
@@ -2220,7 +2272,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
{
|
{
|
||||||
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
|
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
|
||||||
label_key, label_value);
|
label_key, label_value);
|
||||||
exit(ERR_BAD_BACKUP_LABEL);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("standby clone: got backup label entry \"%s: %s\"\n",
|
log_debug("standby clone: got backup label entry \"%s: %s\"\n",
|
||||||
@@ -2232,14 +2284,19 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
char wal_filename[MAXLEN];
|
char wal_filename[MAXLEN];
|
||||||
|
|
||||||
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
|
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
|
||||||
|
|
||||||
if (nmatches != 2)
|
if (nmatches != 2)
|
||||||
{
|
{
|
||||||
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
|
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
|
||||||
exit(ERR_BAD_BACKUP_LABEL);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
out_backup_label->start_wal_location =
|
out_backup_label->start_wal_location =
|
||||||
parse_label_lsn(&label_key[0], start_wal_location);
|
parse_label_lsn(&label_key[0], start_wal_location);
|
||||||
|
|
||||||
|
if (out_backup_label->start_wal_location == InvalidXLogRecPtr)
|
||||||
|
return false;
|
||||||
|
|
||||||
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
|
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
|
||||||
out_backup_label->start_wal_file[MAXLEN-1] = '\0';
|
out_backup_label->start_wal_file[MAXLEN-1] = '\0';
|
||||||
}
|
}
|
||||||
@@ -2247,6 +2304,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
{
|
{
|
||||||
out_backup_label->checkpoint_location =
|
out_backup_label->checkpoint_location =
|
||||||
parse_label_lsn(&label_key[0], &label_value[0]);
|
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||||
|
|
||||||
|
if (out_backup_label->checkpoint_location == InvalidXLogRecPtr)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
else if (strcmp(label_key, "BACKUP METHOD") == 0)
|
else if (strcmp(label_key, "BACKUP METHOD") == 0)
|
||||||
{
|
{
|
||||||
@@ -2272,6 +2332,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
{
|
{
|
||||||
out_backup_label->min_failover_slot_lsn =
|
out_backup_label->min_failover_slot_lsn =
|
||||||
parse_label_lsn(&label_key[0], &label_value[0]);
|
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||||
|
|
||||||
|
if (out_backup_label->min_failover_slot_lsn == InvalidXLogRecPtr)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2281,6 +2344,10 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
|
|||||||
}
|
}
|
||||||
|
|
||||||
(void) fclose(label_file);
|
(void) fclose(label_file);
|
||||||
|
|
||||||
|
log_debug(_("read_backup_label: label is %s; start wal file is %s\n"), out_backup_label->label, out_backup_label->start_wal_file);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -2663,6 +2730,8 @@ do_standby_follow(void)
|
|||||||
*
|
*
|
||||||
* TODO:
|
* TODO:
|
||||||
* - make connection test timeouts/intervals configurable (see below)
|
* - make connection test timeouts/intervals configurable (see below)
|
||||||
|
* - add command line option --remote_pg_bindir or similar to
|
||||||
|
* optionally handle cases where the remote pg_bindir is different
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -2673,7 +2742,7 @@ do_standby_switchover(void)
|
|||||||
int server_version_num;
|
int server_version_num;
|
||||||
bool use_pg_rewind;
|
bool use_pg_rewind;
|
||||||
|
|
||||||
/* the remote server is the primary which will be demoted */
|
/* the remote server is the primary to be demoted */
|
||||||
char remote_conninfo[MAXCONNINFO] = "";
|
char remote_conninfo[MAXCONNINFO] = "";
|
||||||
char remote_host[MAXLEN];
|
char remote_host[MAXLEN];
|
||||||
char remote_data_directory[MAXLEN];
|
char remote_data_directory[MAXLEN];
|
||||||
@@ -2689,9 +2758,9 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
char repmgr_db_cli_params[MAXLEN] = "";
|
char repmgr_db_cli_params[MAXLEN] = "";
|
||||||
int query_result;
|
int query_result;
|
||||||
t_node_info remote_node_record;
|
t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
bool connection_success;
|
bool connection_success,
|
||||||
|
shutdown_success;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SANITY CHECKS
|
* SANITY CHECKS
|
||||||
@@ -2711,7 +2780,7 @@ do_standby_switchover(void)
|
|||||||
log_err(_("switchover must be executed from the standby node to be promoted\n"));
|
log_err(_("switchover must be executed from the standby node to be promoted\n"));
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
server_version_num = check_server_version(local_conn, "master", true, NULL);
|
server_version_num = check_server_version(local_conn, "master", true, NULL);
|
||||||
@@ -2821,8 +2890,8 @@ do_standby_switchover(void)
|
|||||||
/* 9.5 and later have pg_rewind built-in - always use that */
|
/* 9.5 and later have pg_rewind built-in - always use that */
|
||||||
use_pg_rewind = true;
|
use_pg_rewind = true;
|
||||||
maxlen_snprintf(remote_pg_rewind,
|
maxlen_snprintf(remote_pg_rewind,
|
||||||
"%s/pg_rewind",
|
"%s",
|
||||||
pg_bindir);
|
make_pg_path("pg_rewind"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2842,8 +2911,8 @@ do_standby_switchover(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
maxlen_snprintf(remote_pg_rewind,
|
maxlen_snprintf(remote_pg_rewind,
|
||||||
"%s/pg_rewind",
|
"%s",
|
||||||
pg_bindir);
|
make_pg_path("pg_rewind"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -3038,8 +3107,8 @@ do_standby_switchover(void)
|
|||||||
log_verbose(LOG_DEBUG, "remote_archive_config_dir: %s\n", remote_archive_config_dir);
|
log_verbose(LOG_DEBUG, "remote_archive_config_dir: %s\n", remote_archive_config_dir);
|
||||||
|
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/repmgr standby archive-config -f %s --config-archive-dir=%s",
|
"%s standby archive-config -f %s --config-archive-dir=%s",
|
||||||
pg_bindir,
|
make_pg_path("repmgr"),
|
||||||
runtime_options.remote_config_file,
|
runtime_options.remote_config_file,
|
||||||
remote_archive_config_dir);
|
remote_archive_config_dir);
|
||||||
|
|
||||||
@@ -3094,41 +3163,63 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&command_output);
|
termPQExpBuffer(&command_output);
|
||||||
|
|
||||||
connection_success = false;
|
shutdown_success = false;
|
||||||
|
|
||||||
/* loop for timeout waiting for current primary to stop */
|
/* loop for timeout waiting for current primary to stop */
|
||||||
|
|
||||||
for(i = 0; i < options.reconnect_attempts; i++)
|
for (i = 0; i < options.reconnect_attempts; i++)
|
||||||
{
|
{
|
||||||
/* Check whether primary is available */
|
/* Check whether primary is available */
|
||||||
|
|
||||||
remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */
|
PGPing ping_res = PQping(remote_conninfo);
|
||||||
|
|
||||||
/* XXX failure to connect doesn't mean the server is necessarily
|
/* database server could not be contacted */
|
||||||
* completely stopped - we need to better detect the reason for
|
if (ping_res == PQPING_NO_RESPONSE)
|
||||||
* connection failure ("server not listening" vs "shutting down")
|
|
||||||
*
|
|
||||||
* -> check is_pgup()
|
|
||||||
*/
|
|
||||||
if (PQstatus(remote_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
connection_success = true;
|
bool command_success;
|
||||||
|
|
||||||
log_notice(_("current master has been stopped\n"));
|
/*
|
||||||
break;
|
* directly access the server and check that the
|
||||||
|
* pidfile has gone away so we can be sure the server is actually
|
||||||
|
* shut down and the PQPING_NO_RESPONSE is not due to other issues
|
||||||
|
* such as coincidental network failure.
|
||||||
|
*/
|
||||||
|
initPQExpBuffer(&command_output);
|
||||||
|
|
||||||
|
maxlen_snprintf(command,
|
||||||
|
"ls %s/postmaster.pid >/dev/null 2>&1 && echo 1 || echo 0",
|
||||||
|
remote_data_directory);
|
||||||
|
|
||||||
|
command_success = remote_command(
|
||||||
|
remote_host,
|
||||||
|
runtime_options.remote_user,
|
||||||
|
command,
|
||||||
|
&command_output);
|
||||||
|
|
||||||
|
if (command_success == true && *command_output.data == '0')
|
||||||
|
{
|
||||||
|
shutdown_success = true;
|
||||||
|
|
||||||
|
log_notice(_("current master has been stopped\n"));
|
||||||
|
|
||||||
|
termPQExpBuffer(&command_output);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&command_output);
|
||||||
}
|
}
|
||||||
PQfinish(remote_conn);
|
|
||||||
|
|
||||||
// configurable?
|
/* XXX make configurable? */
|
||||||
sleep(options.reconnect_interval);
|
sleep(options.reconnect_interval);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (connection_success == false)
|
if (shutdown_success == false)
|
||||||
{
|
{
|
||||||
log_err(_("master server did not shut down\n"));
|
log_err(_("master server did not shut down\n"));
|
||||||
log_hint(_("check the master server status before performing any further actions"));
|
log_hint(_("check the master server status before performing any further actions"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* promote this standby */
|
/* promote this standby */
|
||||||
@@ -3151,8 +3242,8 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
/* Execute pg_rewind */
|
/* Execute pg_rewind */
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/pg_rewind -D %s --source-server=\\'%s\\'",
|
"%s -D %s --source-server=\\'%s\\'",
|
||||||
pg_bindir,
|
remote_pg_rewind,
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
options.conninfo);
|
options.conninfo);
|
||||||
|
|
||||||
@@ -3173,8 +3264,8 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
/* Restore any previously archived config files */
|
/* Restore any previously archived config files */
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/repmgr standby restore-config -D %s --config-archive-dir=%s",
|
"%s standby restore-config -D %s --config-archive-dir=%s",
|
||||||
pg_bindir,
|
make_pg_path("repmgr"),
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
remote_archive_config_dir);
|
remote_archive_config_dir);
|
||||||
|
|
||||||
@@ -3231,8 +3322,8 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
format_db_cli_params(options.conninfo, repmgr_db_cli_params);
|
format_db_cli_params(options.conninfo, repmgr_db_cli_params);
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/repmgr -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone",
|
"%s -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone",
|
||||||
pg_bindir,
|
make_pg_path("repmgr"),
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
runtime_options.remote_config_file,
|
runtime_options.remote_config_file,
|
||||||
repmgr_db_cli_params
|
repmgr_db_cli_params
|
||||||
@@ -3257,8 +3348,8 @@ do_standby_switchover(void)
|
|||||||
*/
|
*/
|
||||||
format_db_cli_params(options.conninfo, repmgr_db_cli_params);
|
format_db_cli_params(options.conninfo, repmgr_db_cli_params);
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"%s/repmgr -D %s -f %s %s standby follow",
|
"%s -D %s -f %s %s standby follow",
|
||||||
pg_bindir,
|
make_pg_path("repmgr"),
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
runtime_options.remote_config_file,
|
runtime_options.remote_config_file,
|
||||||
repmgr_db_cli_params
|
repmgr_db_cli_params
|
||||||
@@ -3292,7 +3383,7 @@ do_standby_switchover(void)
|
|||||||
if (is_standby(remote_conn) == 0)
|
if (is_standby(remote_conn) == 0)
|
||||||
{
|
{
|
||||||
log_err(_("new standby (old master) is not a standby\n"));
|
log_err(_("new standby (old master) is not a standby\n"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
connection_success = true;
|
connection_success = true;
|
||||||
break;
|
break;
|
||||||
@@ -3306,7 +3397,7 @@ do_standby_switchover(void)
|
|||||||
if (connection_success == false)
|
if (connection_success == false)
|
||||||
{
|
{
|
||||||
log_err(_("unable to connect to new standby (old master)\n"));
|
log_err(_("unable to connect to new standby (old master)\n"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("new standby is in recovery\n");
|
log_debug("new standby is in recovery\n");
|
||||||
@@ -3315,15 +3406,14 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
local_conn = establish_db_connection(options.conninfo, true);
|
local_conn = establish_db_connection(options.conninfo, true);
|
||||||
|
|
||||||
|
|
||||||
query_result = get_node_replication_state(local_conn, remote_node_record.name, remote_node_replication_state);
|
query_result = get_node_replication_state(local_conn, remote_node_record.name, remote_node_replication_state);
|
||||||
|
|
||||||
if (query_result == -1)
|
if (query_result == -1)
|
||||||
{
|
{
|
||||||
log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id);
|
log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
// errcode?
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (query_result == 0)
|
if (query_result == 0)
|
||||||
@@ -3332,7 +3422,6 @@ do_standby_switchover(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* XXX other valid values? */
|
|
||||||
/* XXX we should poll for a while in case the node takes time to connect to the primary */
|
/* XXX we should poll for a while in case the node takes time to connect to the primary */
|
||||||
if (strcmp(remote_node_replication_state, "streaming") == 0 ||
|
if (strcmp(remote_node_replication_state, "streaming") == 0 ||
|
||||||
strcmp(remote_node_replication_state, "catchup") == 0)
|
strcmp(remote_node_replication_state, "catchup") == 0)
|
||||||
@@ -3341,9 +3430,16 @@ do_standby_switchover(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("node %i replication state is \"%s\"\n"), remote_node_id, remote_node_replication_state);
|
/*
|
||||||
|
* Other possible replication states are:
|
||||||
|
* - startup
|
||||||
|
* - backup
|
||||||
|
* - UNKNOWN
|
||||||
|
*/
|
||||||
|
log_err(_("node %i has unexpected replication state \"%s\"\n"),
|
||||||
|
remote_node_id, remote_node_replication_state);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3355,7 +3451,7 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
if (options.use_replication_slots)
|
if (options.use_replication_slots)
|
||||||
{
|
{
|
||||||
t_node_info local_node_record;
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
query_result = get_node_record(local_conn, options.cluster_name, options.node, &local_node_record);
|
query_result = get_node_record(local_conn, options.cluster_name, options.node, &local_node_record);
|
||||||
|
|
||||||
@@ -4120,12 +4216,13 @@ do_help(void)
|
|||||||
printf(_(" -w, --wal-keep-segments=VALUE (standby clone) minimum value for the GUC\n" \
|
printf(_(" -w, --wal-keep-segments=VALUE (standby clone) minimum value for the GUC\n" \
|
||||||
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
||||||
printf(_(" -W, --wait (standby follow) wait for a master to appear\n"));
|
printf(_(" -W, --wait (standby follow) wait for a master to appear\n"));
|
||||||
printf(_(" -m, --mode (standby switchover) shutdown mode (smart|fast|immediate)\n"));
|
printf(_(" -m, --mode (standby switchover) shutdown mode (\"fast\" - default, \"smart\" or \"immediate\")\n"));
|
||||||
printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \
|
printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \
|
||||||
" the current master\n"));
|
" the current master\n"));
|
||||||
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
|
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
|
||||||
" optionally providing a path to the binary\n"));
|
" optionally providing a path to the binary\n"));
|
||||||
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
|
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
|
||||||
|
printf(_(" --csv (cluster show) output in CSV mode (0 = master, 1 = standby, -1 = down)\n"));
|
||||||
/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/
|
/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/
|
||||||
printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n"));
|
printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n"));
|
||||||
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
|
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
|
||||||
@@ -4216,6 +4313,15 @@ create_recovery_file(const char *data_dir)
|
|||||||
log_debug(_("recovery.conf: %s"), line);
|
log_debug(_("recovery.conf: %s"), line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If restore_command is set, we use it as restore_command in recovery.conf */
|
||||||
|
if (strcmp(options.restore_command, "") != 0)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(line, "restore_command = '%s'\n",
|
||||||
|
options.restore_command);
|
||||||
|
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
||||||
|
return false;
|
||||||
|
log_debug(_("recovery.conf: %s"), line);
|
||||||
|
}
|
||||||
fclose(recovery_file);
|
fclose(recovery_file);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -4371,12 +4477,12 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
run_basebackup(const char *data_dir)
|
run_basebackup(const char *data_dir, int server_version)
|
||||||
{
|
{
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
int r = 0;
|
int r = 0;
|
||||||
PQExpBufferData params;
|
PQExpBufferData params;
|
||||||
TablespaceListCell *cell;
|
TablespaceListCell *cell;
|
||||||
|
|
||||||
/* Create pg_basebackup command line options */
|
/* Create pg_basebackup command line options */
|
||||||
|
|
||||||
@@ -4411,6 +4517,28 @@ run_basebackup(const char *data_dir)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To ensure we have all the WALs needed during basebackup execution we stream
|
||||||
|
* them as the backup is taking place.
|
||||||
|
* Not necessary if on 9.6 if we have replication slots set in repmgr.conf
|
||||||
|
* (starting at 9.6 there is an option, which we use, to reserve the LSN at
|
||||||
|
* creation time)
|
||||||
|
*/
|
||||||
|
if (server_version < 90600 || !options.use_replication_slots)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We're going to check first if the user set the xlog method in the repmgr.conf
|
||||||
|
* file. We don't want to have conflicts with pg_basebackup due to specifying the
|
||||||
|
* method twice.
|
||||||
|
*/
|
||||||
|
const char xlog_short[4] = "-X ";
|
||||||
|
const char xlog_long[14] = "--xlog-method";
|
||||||
|
if (strstr(options.pg_basebackup_options, xlog_short) == NULL && strstr(options.pg_basebackup_options, xlog_long) == NULL )
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(¶ms, " -X stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
maxlen_snprintf(script,
|
maxlen_snprintf(script,
|
||||||
"%s -l \"repmgr base backup\" %s %s",
|
"%s -l \"repmgr base backup\" %s %s",
|
||||||
make_pg_path("pg_basebackup"),
|
make_pg_path("pg_basebackup"),
|
||||||
@@ -4423,7 +4551,7 @@ run_basebackup(const char *data_dir)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* As of 9.4, pg_basebackup only ever returns 0 or 1
|
* As of 9.4, pg_basebackup only ever returns 0 or 1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
|
|
||||||
@@ -4624,6 +4752,15 @@ check_parameters_for_action(const int action)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Warn about parameters which apply to CLUSTER SHOW only */
|
||||||
|
if (action != CLUSTER_SHOW)
|
||||||
|
{
|
||||||
|
if (runtime_options.csv_mode)
|
||||||
|
{
|
||||||
|
error_list_append(&cli_warnings, _("--csv can only be used when executing CLUSTER SHOW"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5080,6 +5217,10 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
|
|||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases
|
||||||
|
* for "replica", but current_setting() will of course always return "replica"
|
||||||
|
*/
|
||||||
char *levels_96plus[] = {
|
char *levels_96plus[] = {
|
||||||
"replica",
|
"replica",
|
||||||
"logical",
|
"logical",
|
||||||
|
|||||||
@@ -19,7 +19,7 @@
|
|||||||
|
|
||||||
# Node ID and name
|
# Node ID and name
|
||||||
# (Note: we recommend to avoid naming nodes after their initial
|
# (Note: we recommend to avoid naming nodes after their initial
|
||||||
# replication funcion, as this will cause confusion when e.g.
|
# replication function, as this will cause confusion when e.g.
|
||||||
# "standby2" is promoted to primary)
|
# "standby2" is promoted to primary)
|
||||||
#node=2 # a unique integer
|
#node=2 # a unique integer
|
||||||
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||||
@@ -28,8 +28,16 @@
|
|||||||
|
|
||||||
# Database connection information as a conninfo string
|
# Database connection information as a conninfo string
|
||||||
# This must be accessible to all servers in the cluster; for details see:
|
# This must be accessible to all servers in the cluster; for details see:
|
||||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
#
|
||||||
|
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||||
|
#
|
||||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||||
|
#
|
||||||
|
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
||||||
|
# conninfo string to determine the length of time which elapses before
|
||||||
|
# a network connection attempt is abandoned; for details see:
|
||||||
|
#
|
||||||
|
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
|
||||||
|
|
||||||
# Optional configuration items
|
# Optional configuration items
|
||||||
# ============================
|
# ============================
|
||||||
@@ -113,6 +121,10 @@
|
|||||||
#
|
#
|
||||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
||||||
|
|
||||||
|
# You can specify a restore_command to be used in the recovery.conf that
|
||||||
|
# will be placed in the cloned standby
|
||||||
|
#
|
||||||
|
# restore_command = cp /path/to/archived/wals/%f %p
|
||||||
|
|
||||||
# Failover settings (repmgrd)
|
# Failover settings (repmgrd)
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
|
|||||||
6
repmgr.h
6
repmgr.h
@@ -28,6 +28,7 @@
|
|||||||
#include "dbutils.h"
|
#include "dbutils.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "dirmod.h"
|
||||||
|
|
||||||
#define MIN_SUPPORTED_VERSION "9.3"
|
#define MIN_SUPPORTED_VERSION "9.3"
|
||||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||||
@@ -69,7 +70,7 @@ typedef struct
|
|||||||
bool rsync_only;
|
bool rsync_only;
|
||||||
bool fast_checkpoint;
|
bool fast_checkpoint;
|
||||||
bool ignore_external_config_files;
|
bool ignore_external_config_files;
|
||||||
char pg_ctl_mode[MAXLEN];
|
bool csv_mode;
|
||||||
char masterport[MAXLEN];
|
char masterport[MAXLEN];
|
||||||
/*
|
/*
|
||||||
* configuration file parameters which can be overridden on the
|
* configuration file parameters which can be overridden on the
|
||||||
@@ -80,6 +81,7 @@ typedef struct
|
|||||||
/* parameter used by STANDBY SWITCHOVER */
|
/* parameter used by STANDBY SWITCHOVER */
|
||||||
char remote_config_file[MAXLEN];
|
char remote_config_file[MAXLEN];
|
||||||
char pg_rewind[MAXPGPATH];
|
char pg_rewind[MAXPGPATH];
|
||||||
|
char pg_ctl_mode[MAXLEN];
|
||||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||||
char config_archive_dir[MAXLEN];
|
char config_archive_dir[MAXLEN];
|
||||||
/* parameter used by CLUSTER CLEANUP */
|
/* parameter used by CLUSTER CLEANUP */
|
||||||
@@ -94,7 +96,7 @@ typedef struct
|
|||||||
bool initdb_no_pwprompt;
|
bool initdb_no_pwprompt;
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", "", false }
|
||||||
|
|
||||||
struct BackupLabel
|
struct BackupLabel
|
||||||
{
|
{
|
||||||
|
|||||||
149
repmgrd.c
149
repmgrd.c
@@ -44,11 +44,11 @@
|
|||||||
|
|
||||||
|
|
||||||
/* Local info */
|
/* Local info */
|
||||||
t_configuration_options local_options;
|
t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
PGconn *my_local_conn = NULL;
|
PGconn *my_local_conn = NULL;
|
||||||
|
|
||||||
/* Master info */
|
/* Master info */
|
||||||
t_configuration_options master_options;
|
t_configuration_options master_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
|
|
||||||
PGconn *master_conn = NULL;
|
PGconn *master_conn = NULL;
|
||||||
|
|
||||||
@@ -61,8 +61,6 @@ bool failover_done = false;
|
|||||||
|
|
||||||
char *pid_file = NULL;
|
char *pid_file = NULL;
|
||||||
|
|
||||||
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
|
||||||
|
|
||||||
static void help(void);
|
static void help(void);
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
static void check_cluster_configuration(PGconn *conn);
|
static void check_cluster_configuration(PGconn *conn);
|
||||||
@@ -399,7 +397,7 @@ main(int argc, char **argv)
|
|||||||
case STANDBY:
|
case STANDBY:
|
||||||
|
|
||||||
/* We need the node id of the master server as well as a connection to it */
|
/* We need the node id of the master server as well as a connection to it */
|
||||||
log_info(_("connecting to master node '%s'\n"),
|
log_info(_("connecting to master node of cluster '%s'\n"),
|
||||||
local_options.cluster_name);
|
local_options.cluster_name);
|
||||||
|
|
||||||
master_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
@@ -462,16 +460,16 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "standby check loop...\n");
|
if (node_info.type == STANDBY)
|
||||||
|
|
||||||
if (node_info.type == WITNESS)
|
|
||||||
{
|
|
||||||
witness_monitor();
|
|
||||||
}
|
|
||||||
else if (node_info.type == STANDBY)
|
|
||||||
{
|
{
|
||||||
|
log_verbose(LOG_DEBUG, "standby check loop...\n");
|
||||||
standby_monitor();
|
standby_monitor();
|
||||||
}
|
}
|
||||||
|
else if (node_info.type == WITNESS)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_DEBUG, "witness check loop...\n");
|
||||||
|
witness_monitor();
|
||||||
|
}
|
||||||
|
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
@@ -667,7 +665,7 @@ witness_monitor(void)
|
|||||||
" replication_lag, apply_lag )"
|
" replication_lag, apply_lag )"
|
||||||
" VALUES(%d, %d, "
|
" VALUES(%d, %d, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
||||||
" pg_current_xlog_location(), NULL, "
|
" pg_catalog.pg_current_xlog_location(), NULL, "
|
||||||
" 0, 0) ",
|
" 0, 0) ",
|
||||||
get_repmgr_schema_quoted(my_local_conn),
|
get_repmgr_schema_quoted(my_local_conn),
|
||||||
master_options.node,
|
master_options.node,
|
||||||
@@ -695,7 +693,7 @@ standby_monitor(void)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char monitor_standby_timestamp[MAXLEN];
|
char monitor_standby_timestamp[MAXLEN];
|
||||||
char last_wal_master_location[MAXLEN];
|
char last_wal_primary_location[MAXLEN];
|
||||||
char last_xlog_receive_location[MAXLEN];
|
char last_xlog_receive_location[MAXLEN];
|
||||||
char last_xlog_replay_location[MAXLEN];
|
char last_xlog_replay_location[MAXLEN];
|
||||||
char last_xact_replay_timestamp[MAXLEN];
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
@@ -706,6 +704,9 @@ standby_monitor(void)
|
|||||||
XLogRecPtr lsn_last_xlog_receive_location;
|
XLogRecPtr lsn_last_xlog_receive_location;
|
||||||
XLogRecPtr lsn_last_xlog_replay_location;
|
XLogRecPtr lsn_last_xlog_replay_location;
|
||||||
|
|
||||||
|
long long unsigned int replication_lag;
|
||||||
|
long long unsigned int apply_lag;
|
||||||
|
|
||||||
int connection_retries,
|
int connection_retries,
|
||||||
ret;
|
ret;
|
||||||
bool did_retry = false;
|
bool did_retry = false;
|
||||||
@@ -750,10 +751,9 @@ standby_monitor(void)
|
|||||||
? "master"
|
? "master"
|
||||||
: "upstream";
|
: "upstream";
|
||||||
|
|
||||||
// ZZZ "5 minutes"?
|
|
||||||
/*
|
/*
|
||||||
* Check if the upstream node is still available, if after 5 minutes of retries
|
* Check that the upstream node is still available
|
||||||
* we cannot reconnect, try to get a new upstream node.
|
* If not, initiate failover process
|
||||||
*/
|
*/
|
||||||
|
|
||||||
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
||||||
@@ -820,26 +820,24 @@ standby_monitor(void)
|
|||||||
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* When we returns from this function we will have a new master
|
* When we return from this function we will have a new master
|
||||||
* and a new master_conn
|
* and a new master_conn
|
||||||
*/
|
*
|
||||||
|
|
||||||
/*
|
|
||||||
* Failover handling is handled differently depending on whether
|
* Failover handling is handled differently depending on whether
|
||||||
* the failed node is the master or a cascading standby
|
* the failed node is the master or a cascading standby
|
||||||
*/
|
*/
|
||||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||||
|
|
||||||
if (upstream_node.type == MASTER)
|
if (upstream_node.type == MASTER)
|
||||||
{
|
{
|
||||||
log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
|
log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
|
||||||
node_info.upstream_node_id);
|
node_info.upstream_node_id);
|
||||||
do_master_failover();
|
do_master_failover();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"),
|
log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"),
|
||||||
node_info.upstream_node_id);
|
node_info.upstream_node_id);
|
||||||
|
|
||||||
if (!do_upstream_standby_failover(upstream_node))
|
if (!do_upstream_standby_failover(upstream_node))
|
||||||
{
|
{
|
||||||
@@ -847,20 +845,20 @@ standby_monitor(void)
|
|||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to reconnect to new upstream node, terminating..."));
|
_("unable to reconnect to new upstream node, terminating..."));
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(master_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
false,
|
false,
|
||||||
errmsg.data);
|
errmsg.data);
|
||||||
|
|
||||||
terminate(ERR_DB_CON);
|
terminate(ERR_DB_CON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -963,7 +961,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (active_master_id != master_options.node)
|
if (active_master_id != master_options.node)
|
||||||
{
|
{
|
||||||
log_notice(_("connecting to active master (node %i)...\n"), active_master_id); \
|
log_notice(_("connecting to active master (node %i)...\n"), active_master_id);
|
||||||
if (master_conn != NULL)
|
if (master_conn != NULL)
|
||||||
{
|
{
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
@@ -986,9 +984,11 @@ standby_monitor(void)
|
|||||||
|
|
||||||
/* Get local xlog info */
|
/* Get local xlog info */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
"SELECT CURRENT_TIMESTAMP, "
|
||||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
|
"pg_catalog.pg_last_xlog_receive_location(), "
|
||||||
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
|
"pg_catalog.pg_last_xlog_replay_location(), "
|
||||||
|
"pg_catalog.pg_last_xact_replay_timestamp(), "
|
||||||
|
"pg_catalog.pg_last_xlog_receive_location() >= pg_catalog.pg_last_xlog_replay_location()");
|
||||||
|
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1038,7 +1038,12 @@ standby_monitor(void)
|
|||||||
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
|
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get master xlog info */
|
/*
|
||||||
|
* Get master xlog position
|
||||||
|
*
|
||||||
|
* TODO: investigate whether pg_current_xlog_insert_location() would be a better
|
||||||
|
* choice; see: https://github.com/2ndQuadrant/repmgr/issues/189
|
||||||
|
*/
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
|
||||||
|
|
||||||
res = PQexec(master_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
@@ -1049,23 +1054,43 @@ standby_monitor(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
strncpy(last_wal_master_location, PQgetvalue(res, 0, 0), MAXLEN);
|
strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Calculate the lag */
|
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
|
||||||
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
|
||||||
|
|
||||||
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||||
|
|
||||||
|
/* Calculate apply lag */
|
||||||
if (last_xlog_receive_location_gte_replayed == false)
|
if (last_xlog_receive_location_gte_replayed == false)
|
||||||
{
|
{
|
||||||
lsn_last_xlog_receive_location = lsn_last_xlog_replay_location;
|
/*
|
||||||
|
* We're not receiving streaming WAL - in this case the receive location
|
||||||
|
* equals the last replayed location
|
||||||
|
*/
|
||||||
|
apply_lag = 0;
|
||||||
|
strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN);
|
||||||
|
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
|
||||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Calculate replication lag */
|
||||||
|
if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
|
||||||
|
{
|
||||||
|
replication_lag = (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* This should never happen, but in case it does set lag to zero */
|
||||||
|
log_warning("Master xlog (%s) location appears less than standby receive location (%s)\n",
|
||||||
|
last_wal_primary_location,
|
||||||
|
last_xlog_receive_location);
|
||||||
|
replication_lag = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build the SQL to execute on master
|
* Build the SQL to execute on master
|
||||||
*/
|
*/
|
||||||
@@ -1092,11 +1117,10 @@ standby_monitor(void)
|
|||||||
local_options.node,
|
local_options.node,
|
||||||
monitor_standby_timestamp,
|
monitor_standby_timestamp,
|
||||||
last_xact_replay_timestamp,
|
last_xact_replay_timestamp,
|
||||||
last_wal_master_location,
|
last_wal_primary_location,
|
||||||
last_xlog_receive_location,
|
last_xlog_receive_location,
|
||||||
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location),
|
replication_lag,
|
||||||
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location));
|
apply_lag);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Execute the query asynchronously, but don't check for a result. We will
|
* Execute the query asynchronously, but don't check for a result. We will
|
||||||
* check the result next time we pause for a monitor step.
|
* check the result next time we pause for a monitor step.
|
||||||
@@ -1143,8 +1167,8 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
||||||
|
|
||||||
/* Store details of the failed node here */
|
/* Store details of the failed node here */
|
||||||
t_node_info failed_master = T_NODE_INFO_INITIALIZER;
|
t_node_info failed_master = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
/* Store details of the best candidate for promotion to master here */
|
/* Store details of the best candidate for promotion to master here */
|
||||||
t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
|
t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
|
||||||
@@ -1154,7 +1178,7 @@ do_master_failover(void)
|
|||||||
"SELECT id, conninfo, type, upstream_node_id "
|
"SELECT id, conninfo, type, upstream_node_id "
|
||||||
" FROM %s.repl_nodes "
|
" FROM %s.repl_nodes "
|
||||||
" WHERE cluster = '%s' "
|
" WHERE cluster = '%s' "
|
||||||
" AND active IS TRUE "
|
" AND active IS TRUE "
|
||||||
" AND priority > 0 "
|
" AND priority > 0 "
|
||||||
" ORDER BY priority DESC, id "
|
" ORDER BY priority DESC, id "
|
||||||
" LIMIT %i ",
|
" LIMIT %i ",
|
||||||
@@ -1167,7 +1191,6 @@ do_master_failover(void)
|
|||||||
{
|
{
|
||||||
log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn));
|
log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(my_local_conn);
|
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1541,12 +1564,12 @@ do_master_failover(void)
|
|||||||
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
|
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
|
||||||
|
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
|
master_conn = NULL;
|
||||||
|
|
||||||
/* no failover occurred but we'll want to restart connections */
|
/* no failover occurred but we'll want to restart connections */
|
||||||
failover_done = true;
|
failover_done = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(my_local_conn);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log_err(_("promote command failed. You could check and try it manually.\n"));
|
log_err(_("promote command failed. You could check and try it manually.\n"));
|
||||||
@@ -1901,7 +1924,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
static bool
|
static bool
|
||||||
set_local_node_status(void)
|
set_local_node_status(void)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int active_master_node_id = NODE_NOT_FOUND;
|
int active_master_node_id = NODE_NOT_FOUND;
|
||||||
char master_conninfo[MAXLEN];
|
char master_conninfo[MAXLEN];
|
||||||
@@ -1994,10 +2017,12 @@ check_cluster_configuration(PGconn *conn)
|
|||||||
log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema());
|
log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema());
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT oid FROM pg_class "
|
"SELECT oid FROM pg_catalog.pg_class "
|
||||||
" WHERE oid = '%s.repl_nodes'::regclass ",
|
" WHERE oid = '%s.repl_nodes'::regclass ",
|
||||||
get_repmgr_schema_quoted(master_conn));
|
get_repmgr_schema_quoted(master_conn));
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
|
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
|
||||||
@@ -2416,6 +2441,8 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
errmsg.data);
|
errmsg.data);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
conn = NULL;
|
||||||
|
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,15 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
|
|||||||
|
|
||||||
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
||||||
|
|
||||||
|
/* There's also an event table which we need to create */
|
||||||
|
CREATE TABLE repl_events (
|
||||||
|
node_id INTEGER NOT NULL,
|
||||||
|
event TEXT NOT NULL,
|
||||||
|
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
details TEXT NULL
|
||||||
|
);
|
||||||
|
|
||||||
/* When you're sure of your changes, commit them */
|
/* When you're sure of your changes, commit them */
|
||||||
|
|
||||||
-- COMMIT;
|
-- COMMIT;
|
||||||
|
|||||||
@@ -27,5 +27,6 @@
|
|||||||
|
|
||||||
BEGIN;
|
BEGIN;
|
||||||
|
|
||||||
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE;
|
ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey,
|
||||||
|
ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE;
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
|||||||
Reference in New Issue
Block a user