Merge branch 'master' of github.com:2ndQuadrant/repmgr into REL3_1_STABLE

This commit is contained in:
Ian Barwick
2016-07-06 16:43:39 +09:00
22 changed files with 735 additions and 426 deletions

2
FAQ.md
View File

@@ -38,7 +38,7 @@ General
No. Hash indexes and replication do not mix well and their use is No. Hash indexes and replication do not mix well and their use is
explicitly discouraged; see: explicitly discouraged; see:
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175 https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
`repmgr` `repmgr`
-------- --------

10
HISTORY
View File

@@ -1,3 +1,13 @@
3.1.4 2016-07-
repmgr: new configuration option for setting "restore_command"
in the recovery.conf file generated by repmgr (Martín)
repmgr: add --csv option to "repmgr cluster show" (Gianni)
repmgr: enable provision of a conninfo string as the -d/--dbname
parameter, similar to other PostgreSQL utilities (Ian)
repmgr: during switchover operations improve detection of
demotion candidate shutdown (Ian)
various bugfixes and documentation updates (Ian, Martín)
3.1.3 2016-05-17 3.1.3 2016-05-17
repmgrd: enable monitoring when a standby is catching up by repmgrd: enable monitoring when a standby is catching up by
replaying archived WAL (Ian) replaying archived WAL (Ian)

View File

@@ -5,7 +5,7 @@
HEADERS = $(wildcard *.h) HEADERS = $(wildcard *.h)
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o
DATA = repmgr.sql uninstall_repmgr.sql DATA = repmgr.sql uninstall_repmgr.sql

View File

@@ -48,7 +48,7 @@ This guide assumes that you are familiar with PostgreSQL administration and
streaming replication concepts. For further details on streaming streaming replication concepts. For further details on streaming
replication, see this link: replication, see this link:
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
The following terms are used throughout the `repmgr` documentation. The following terms are used throughout the `repmgr` documentation.
@@ -237,15 +237,19 @@ both servers.
On the master server, a PostgreSQL instance must be initialised and running. On the master server, a PostgreSQL instance must be initialised and running.
The following replication settings must be included in `postgresql.conf`: The following replication settings must be included in `postgresql.conf`:
# Enable replication connections; set this figure to at least one more
# than the number of standbys which will connect to this server
# (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
# which requires two free WAL senders)
max_wal_senders = 10
# Ensure WAL files contain enough information to enable read-only queries # Ensure WAL files contain enough information to enable read-only queries
# on the standby # on the standby
wal_level = 'hot_standby' wal_level = 'hot_standby'
# Enable up to 10 replication connections
max_wal_senders = 10
# How much WAL to retain on the master to allow a temporarily # How much WAL to retain on the master to allow a temporarily
# disconnected standby to catch up again. The larger this is, the # disconnected standby to catch up again. The larger this is, the
# longer the standby can be disconnected. This is needed only in # longer the standby can be disconnected. This is needed only in
@@ -259,6 +263,14 @@ The following replication settings must be included in `postgresql.conf`:
hot_standby = on hot_standby = on
# Enable WAL file archiving
archive_mode = on
# Set archive command to a script or application that will safely store
# you WALs in a secure place. /bin/true is an example of a command that
# ignores archiving. Use something more sensible.
archive_command = '/bin/true'
* * * * * *
@@ -458,7 +470,11 @@ so should be used with care.
Further options can be passed to the `pg_basebackup` utility via Further options can be passed to the `pg_basebackup` utility via
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
documentation for more details of available options: documentation for more details of available options:
<<<<<<< HEAD
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
=======
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
>>>>>>> 72f9b0145afab1060dd1202c8f8937653c8b2e39
### Using rsync to clone a standby ### Using rsync to clone a standby
@@ -594,13 +610,13 @@ place. If using the default `pg_basebackup` method, we recommend setting
pg_basebackup_options='--xlog-method=stream' pg_basebackup_options='--xlog-method=stream'
See the `pg_basebackup` documentation for details: See the `pg_basebackup` documentation for details:
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
Otherwise it's necessary to set `wal_keep_segments` to an appropriately high Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
value. value.
Further information on replication slots in the PostgreSQL documentation: Further information on replication slots in the PostgreSQL documentation:
http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
Promoting a standby server with repmgr Promoting a standby server with repmgr
@@ -699,8 +715,9 @@ updated to reflect this:
Note that with cascading replication, `repmgr standby follow` can also be Note that with cascading replication, `repmgr standby follow` can also be
used to detach a standby from its current upstream server and follow another used to detach a standby from its current upstream server and follow the
upstream server, including the master. master. However it's currently not possible to have it follow another standby;
we hope to improve this in a future release.
Performing a switchover with repmgr Performing a switchover with repmgr
@@ -727,7 +744,7 @@ both passwordless SSH access and the path of `repmgr.conf` on that server.
> careful preparation and with adequate attention. In particular you should > careful preparation and with adequate attention. In particular you should
> be confident that your network environment is stable and reliable. > be confident that your network environment is stable and reliable.
> >
> We recommend running `repmgr standby switchover` at the most verbose > We recommend running `repmgr standby switchover` at the most verbose
> logging level (`--log-level DEBUG --verbose`) and capturing all output > logging level (`--log-level DEBUG --verbose`) and capturing all output
> to assist troubleshooting any problems. > to assist troubleshooting any problems.
> >
@@ -793,7 +810,7 @@ should have been updated to reflect this:
### Caveats ### Caveats
- the functionality provided `repmgr standby switchover` is primarily aimed - The functionality provided `repmgr standby switchover` is primarily aimed
at a two-server master/standby replication cluster and currently does at a two-server master/standby replication cluster and currently does
not support additional standbys. not support additional standbys.
- `repmgr standby switchover` is designed to use the `pg_rewind` utility, - `repmgr standby switchover` is designed to use the `pg_rewind` utility,
@@ -802,11 +819,16 @@ should have been updated to reflect this:
- `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that - `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
data checksums were enabled when the cluster was initialized. See the data checksums were enabled when the cluster was initialized. See the
`pg_rewind` documentation for details: `pg_rewind` documentation for details:
http://www.postgresql.org/docs/current/static/app-pgrewind.html https://www.postgresql.org/docs/current/static/app-pgrewind.html
- `repmgrd` should not be running when a switchover is carried out, otherwise - `repmgrd` should not be running when a switchover is carried out, otherwise
the `repmgrd` may try and promote a standby by itself. the `repmgrd` may try and promote a standby by itself.
- Any other standbys attached to the old master will need to be manually - Any other standbys attached to the old master will need to be manually
instructed to point to the new master (e.g. with `repmgr standby follow`). instructed to point to the new master (e.g. with `repmgr standby follow`).
- You must ensure that following a server start using `pg_ctl`, log output
is not send to STDERR (the default behaviour). If logging is not configured,
We recommend setting `logging_collector=on` in `postgresql.conf` and
providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
parameter.
We hope to remove some of these restrictions in future versions of `repmgr`. We hope to remove some of these restrictions in future versions of `repmgr`.
@@ -860,8 +882,8 @@ Adjust schema and node ID accordingly. A future `repmgr` release
will make it possible to unregister failed standbys. will make it possible to unregister failed standbys.
Automatic failover with repmgrd Automatic failover with `repmgrd`
------------------------------- ---------------------------------
`repmgrd` is a management and monitoring daemon which runs on standby nodes `repmgrd` is a management and monitoring daemon which runs on standby nodes
and which can automate actions such as failover and updating standbys to and which can automate actions such as failover and updating standbys to
@@ -981,8 +1003,8 @@ during the failover:
(3 rows) (3 rows)
repmgrd log rotation `repmgrd` log rotation
-------------------- ----------------------
Note that currently `repmgrd` does not provide logfile rotation. To ensure Note that currently `repmgrd` does not provide logfile rotation. To ensure
the current logfile does not grow indefinitely, configure your system's `logrotate` the current logfile does not grow indefinitely, configure your system's `logrotate`
@@ -998,8 +1020,29 @@ for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
create 0600 postgres postgres create 0600 postgres postgres
} }
Monitoring
---------- `repmgrd` and PostgreSQL connection settings
--------------------------------------------
In addition to the `repmgr` configuration settings, parameters in the
`conninfo` string influence how `repmgr` makes a network connection to
PostgreSQL. In particular, if another server in the replication cluster
is unreachable at network level, system network settings will influence
the length of time it takes to determine that the connection is not possible.
In particular explicitly setting a parameter for `connect_timeout` should
be considered; the effective minimum value of `2` (seconds) will ensure
that a connection failure at network level is reported as soon as possible,
otherwise dependeing on the system settings (e.g. `tcp_syn_retries` in Linux)
a delay of a minute or more is possible.
For further details on `conninfo` network connection parameters, see:
https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
Monitoring with `repmgrd`
-------------------------
When `repmgrd` is running with the option `-m/--monitoring-history`, it will When `repmgrd` is running with the option `-m/--monitoring-history`, it will
constantly write standby node status information to the `repl_monitor` table, constantly write standby node status information to the `repl_monitor` table,

View File

@@ -1,61 +0,0 @@
Summary: repmgr
Name: repmgr
Version: 3.0
Release: 1
License: GPLv3
Group: System Environment/Daemons
URL: http://repmgr.org
Packager: Ian Barwick <ian@2ndquadrant.com>
Vendor: 2ndQuadrant Limited
Distribution: centos
Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
%description
repmgr is a utility suite which greatly simplifies
the process of setting up and managing replication
using streaming replication within a cluster of
PostgreSQL servers.
%prep
%setup
%build
export PATH=$PATH:/usr/pgsql-9.3/bin/
%{__make} USE_PGXS=1
%install
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
export PATH=$PATH:/usr/pgsql-9.3/bin/
%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
%clean
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
%files
%defattr(-,root,root)
/usr/bin/repmgr
/usr/bin/repmgrd
/usr/pgsql-9.3/bin/repmgr
/usr/pgsql-9.3/bin/repmgrd
/usr/pgsql-9.3/lib/repmgr_funcs.so
/usr/pgsql-9.3/share/contrib/repmgr.sql
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
%attr(0755,root,root)/etc/init.d/repmgrd
%attr(0644,root,root)/etc/sysconfig/repmgrd
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
%changelog
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
- build for repmgr 3.0
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
- fix witness creation to create db and user if needed
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
- initial build for RHEL6

View File

@@ -1,133 +0,0 @@
#!/bin/sh
#
# chkconfig: - 75 16
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
# processname: repmgrd
# pidfile="/var/run/${NAME}.pid"
# Source function library.
INITD=/etc/rc.d/init.d
. $INITD/functions
# Get function listing for cross-distribution logic.
TYPESET=`typeset -f|grep "declare"`
# Get network config.
. /etc/sysconfig/network
DESC="PostgreSQL replication management and monitoring daemon"
NAME=repmgrd
REPMGRD_ENABLED=no
REPMGRD_OPTS=
REPMGRD_USER=postgres
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
REPMGRD_PIDFILE=/var/run/repmgrd.pid
REPMGRD_LOCK=/var/lock/subsys/${NAME}
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
# Read configuration variable file if it is present
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
# For SELinux we need to use 'runuser' not 'su'
if [ -x /sbin/runuser ]
then
SU=runuser
else
SU=su
fi
test -x $REPMGRD_BIN || exit 0
case "$REPMGRD_ENABLED" in
[Yy]*)
break
;;
*)
exit 0
;;
esac
if [ -z "${REPMGRD_OPTS}" ]
then
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
exit 0
fi
start()
{
REPMGRD_START=$"Starting ${NAME} service: "
# Make sure startup-time log file is valid
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
then
touch "${REPMGRD_LOG}" || exit 1
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
chmod go-rwx "${REPMGRD_LOG}"
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
fi
echo -n "${REPMGRD_START}"
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
sleep 2
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
if [ "x${pid}" != "x" ]
then
success "${REPMGRD_START}"
touch "${REPMGRD_LOCK}"
echo $pid > "${REPMGRD_PIDFILE}"
echo
else
failure "${REPMGRD_START}"
echo
script_result=1
fi
}
stop()
{
echo -n $"Stopping ${NAME} service: "
if [ -e "${REPMGRD_LOCK}" ]
then
killproc ${NAME}
ret=$?
if [ $ret -eq 0 ]
then
echo_success
rm -f "${REPMGRD_PIDFILE}"
rm -f "${REPMGRD_LOCK}"
else
echo_failure
script_result=1
fi
else
# not running; per LSB standards this is "ok"
echo_success
fi
echo
}
# See how we were called.
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status -p $REPMGRD_PIDFILE $NAME
script_result=$?
;;
restart)
stop
start
;;
*)
echo $"Usage: $0 {start|stop|status|restart}"
exit 2
esac
exit $script_result

View File

@@ -1,21 +0,0 @@
# default settings for repmgrd. This file is source by /bin/sh from
# /etc/init.d/repmgrd
# disable repmgrd by default so it won't get started upon installation
# valid values: yes/no
REPMGRD_ENABLED=no
# Options for repmgrd (required)
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
# User to run repmgrd as
#REPMGRD_USER=postgres
# repmgrd binary
#REPMGRD_BIN=/usr/bin/repmgrd
# pid file
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
# log file
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log

5
TODO
View File

@@ -53,8 +53,9 @@ Planned feature improvements
requested, activate the replication slot using pg_receivexlog to negate the requested, activate the replication slot using pg_receivexlog to negate the
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5). need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
* Take into account the fact that a standby can obtain WAL from an archive, * repmgr: enable "standby follow" to point a standby at another standby, not
so even if direct streaming replication is interrupted, it may be up-to-date just the replication cluster master (see GitHub #130)
Usability improvements Usability improvements
====================== ======================

View File

@@ -28,7 +28,7 @@ static void parse_event_notifications_list(t_configuration_options *options, con
static void tablespace_list_append(t_configuration_options *options, const char *arg); static void tablespace_list_append(t_configuration_options *options, const char *arg);
static void exit_with_errors(ErrorList *config_errors); static void exit_with_errors(ErrorList *config_errors);
const static char *_progname = '\0'; const static char *_progname = NULL;
static char config_file_path[MAXPGPATH]; static char config_file_path[MAXPGPATH];
static bool config_file_provided = false; static bool config_file_provided = false;
bool config_file_found = false; bool config_file_found = false;
@@ -224,6 +224,7 @@ parse_config(t_configuration_options *options)
memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options)); memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options)); memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
memset(options->restore_command, 0, sizeof(options->restore_command));
/* default master_response_timeout is 60 seconds */ /* default master_response_timeout is 60 seconds */
options->master_response_timeout = 60; options->master_response_timeout = 60;
@@ -239,6 +240,8 @@ parse_config(t_configuration_options *options)
options->witness_repl_nodes_sync_interval_secs = 30; options->witness_repl_nodes_sync_interval_secs = 30;
memset(options->event_notification_command, 0, sizeof(options->event_notification_command)); memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
options->event_notifications.head = NULL;
options->event_notifications.tail = NULL;
options->tablespace_mapping.head = NULL; options->tablespace_mapping.head = NULL;
options->tablespace_mapping.tail = NULL; options->tablespace_mapping.tail = NULL;
@@ -340,7 +343,8 @@ parse_config(t_configuration_options *options)
strncpy(options->follow_command, value, MAXLEN); strncpy(options->follow_command, value, MAXLEN);
else if (strcmp(name, "master_response_timeout") == 0) else if (strcmp(name, "master_response_timeout") == 0)
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false); options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
/* 'primary_response_timeout' as synonym for 'master_response_timeout' - /*
* 'primary_response_timeout' as synonym for 'master_response_timeout' -
* we'll switch terminology in a future release (3.1?) * we'll switch terminology in a future release (3.1?)
*/ */
else if (strcmp(name, "primary_response_timeout") == 0) else if (strcmp(name, "primary_response_timeout") == 0)
@@ -372,6 +376,8 @@ parse_config(t_configuration_options *options)
parse_event_notifications_list(options, value); parse_event_notifications_list(options, value);
else if (strcmp(name, "tablespace_mapping") == 0) else if (strcmp(name, "tablespace_mapping") == 0)
tablespace_list_append(options, value); tablespace_list_append(options, value);
else if (strcmp(name, "restore_command") == 0)
strncpy(options->restore_command, value, MAXLEN);
else else
{ {
known_parameter = false; known_parameter = false;

View File

@@ -72,6 +72,7 @@ typedef struct
char pg_bindir[MAXLEN]; char pg_bindir[MAXLEN];
char pg_ctl_options[MAXLEN]; char pg_ctl_options[MAXLEN];
char pg_basebackup_options[MAXLEN]; char pg_basebackup_options[MAXLEN];
char restore_command[MAXLEN];
char logfile[MAXLEN]; char logfile[MAXLEN];
int monitor_interval_secs; int monitor_interval_secs;
int retry_promote_interval_secs; int retry_promote_interval_secs;
@@ -82,7 +83,11 @@ typedef struct
TablespaceList tablespace_mapping; TablespaceList tablespace_mapping;
} t_configuration_options; } t_configuration_options;
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } /*
* The following will initialize the structure with a minimal set of options;
* actual defaults are set in parse_config() before parsing the configuration file
*/
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
typedef struct ErrorListCell typedef struct ErrorListCell
{ {

View File

@@ -31,6 +31,7 @@
char repmgr_schema[MAXLEN] = ""; char repmgr_schema[MAXLEN] = "";
char repmgr_schema_quoted[MAXLEN] = ""; char repmgr_schema_quoted[MAXLEN] = "";
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
PGconn * PGconn *
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice) _establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
@@ -538,7 +539,7 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)
conninfo_options = PQconninfoParse(conninfo, NULL); conninfo_options = PQconninfoParse(conninfo, NULL);
if (conninfo_options == false) if (conninfo_options == NULL)
{ {
log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo); log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
return false; return false;
@@ -1681,8 +1682,7 @@ int
get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info) get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
{ {
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
PGresult *res; int result;
int ntuples;
sqlquery_snprintf( sqlquery_snprintf(
sqlquery, sqlquery,
@@ -1696,6 +1696,51 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery); log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
result = _get_node_record(conn, cluster, sqlquery, node_info);
if (result == 0)
{
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
}
return result;
}
int
get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info)
{
char sqlquery[QUERY_STR_LEN];
int result;
sqlquery_snprintf(
sqlquery,
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
" FROM %s.repl_nodes "
" WHERE cluster = '%s' "
" AND name = '%s'",
get_repmgr_schema_quoted(conn),
cluster,
node_name);
log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery);
result = _get_node_record(conn, cluster, sqlquery, node_info);
if (result == 0)
{
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name);
}
return result;
}
static int
_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info)
{
int ntuples;
PGresult *res;
res = PQexec(conn, sqlquery); res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
@@ -1706,7 +1751,6 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
if (ntuples == 0) if (ntuples == 0)
{ {
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
return 0; return 0;
} }
@@ -1727,6 +1771,9 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
} }
int int
get_node_replication_state(PGconn *conn, char *node_name, char *output) get_node_replication_state(PGconn *conn, char *node_name, char *output)
{ {

View File

@@ -52,18 +52,6 @@ typedef struct s_node_info
} t_node_info; } t_node_info;
/*
* Struct to store replication slot information
*/
typedef struct s_replication_slot
{
char slot_name[MAXLEN];
char slot_type[MAXLEN];
bool active;
} t_replication_slot;
#define T_NODE_INFO_INITIALIZER { \ #define T_NODE_INFO_INITIALIZER { \
NODE_NOT_FOUND, \ NODE_NOT_FOUND, \
NO_UPSTREAM_NODE, \ NO_UPSTREAM_NODE, \
@@ -78,6 +66,19 @@ typedef struct s_replication_slot
InvalidXLogRecPtr \ InvalidXLogRecPtr \
} }
/*
* Struct to store replication slot information
*/
typedef struct s_replication_slot
{
char slot_name[MAXLEN];
char slot_type[MAXLEN];
bool active;
} t_replication_slot;
PGconn *_establish_db_connection(const char *conninfo, PGconn *_establish_db_connection(const char *conninfo,
const bool exit_on_error, const bool exit_on_error,
const bool log_notice); const bool log_notice);
@@ -125,6 +126,7 @@ bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *c
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active); bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
bool delete_node_record(PGconn *conn, int node, char *action); bool delete_node_record(PGconn *conn, int node, char *action);
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info); int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
int get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active); bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id); bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);

View File

@@ -1,9 +1,9 @@
Package: repmgr-auto Package: repmgr-auto
Version: 3.0.1 Version: 3.1.3
Section: database Section: database
Priority: optional Priority: optional
Architecture: all Architecture: all
Depends: rsync, postgresql-9.3 | postgresql-9.4 Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5
Maintainer: Self built package <user@localhost> Maintainer: Self built package <user@localhost>
Description: PostgreSQL replication setup, magament and monitoring Description: PostgreSQL replication setup, magament and monitoring
has two main executables has two main executables

194
dirmod.c Normal file
View File

@@ -0,0 +1,194 @@
/*
*
* dirmod.c
* directory handling functions
*
* Copyright (C) 2ndQuadrant, 2010-2016
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "postgres_fe.h"
/* Don't modify declarations in system headers */
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
/*
* pgfnames
*
* return a list of the names of objects in the argument directory. Caller
* must call pgfnames_cleanup later to free the memory allocated by this
* function.
*/
char **
pgfnames(const char *path)
{
DIR *dir;
struct dirent *file;
char **filenames;
int numnames = 0;
int fnsize = 200; /* enough for many small dbs */
dir = opendir(path);
if (dir == NULL)
{
return NULL;
}
filenames = (char **) palloc(fnsize * sizeof(char *));
while (errno = 0, (file = readdir(dir)) != NULL)
{
if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
{
if (numnames + 1 >= fnsize)
{
fnsize *= 2;
filenames = (char **) repalloc(filenames,
fnsize * sizeof(char *));
}
filenames[numnames++] = pstrdup(file->d_name);
}
}
if (errno)
{
fprintf(stderr, _("could not read directory \"%s\": %s\n"),
path, strerror(errno));
}
filenames[numnames] = NULL;
if (closedir(dir))
{
fprintf(stderr, _("could not close directory \"%s\": %s\n"),
path, strerror(errno));
}
return filenames;
}
/*
* pgfnames_cleanup
*
* deallocate memory used for filenames
*/
void
pgfnames_cleanup(char **filenames)
{
char **fn;
for (fn = filenames; *fn; fn++)
pfree(*fn);
pfree(filenames);
}
/*
* rmtree
*
* Delete a directory tree recursively.
* Assumes path points to a valid directory.
* Deletes everything under path.
* If rmtopdir is true deletes the directory too.
* Returns true if successful, false if there was any problem.
* (The details of the problem are reported already, so caller
* doesn't really have to say anything more, but most do.)
*/
bool
rmtree(const char *path, bool rmtopdir)
{
bool result = true;
char pathbuf[MAXPGPATH];
char **filenames;
char **filename;
struct stat statbuf;
/*
* we copy all the names out of the directory before we start modifying
* it.
*/
filenames = pgfnames(path);
if (filenames == NULL)
return false;
/* now we have the names we can start removing things */
for (filename = filenames; *filename; filename++)
{
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
/*
* It's ok if the file is not there anymore; we were just about to
* delete it anyway.
*
* This is not an academic possibility. One scenario where this
* happens is when bgwriter has a pending unlink request for a file in
* a database that's being dropped. In dropdb(), we call
* ForgetDatabaseFsyncRequests() to flush out any such pending unlink
* requests, but because that's asynchronous, it's not guaranteed that
* the bgwriter receives the message in time.
*/
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
{
result = false;
}
continue;
}
if (S_ISDIR(statbuf.st_mode))
{
/* call ourselves recursively for a directory */
if (!rmtree(pathbuf, true))
{
/* we already reported the error */
result = false;
}
}
else
{
if (unlink(pathbuf) != 0)
{
if (errno != ENOENT)
{
result = false;
}
}
}
}
if (rmtopdir)
{
if (rmdir(path) != 0)
{
result = false;
}
}
pgfnames_cleanup(filenames);
return result;
}

23
dirmod.h Normal file
View File

@@ -0,0 +1,23 @@
/*
* dirmod.h
* Copyright (c) 2ndQuadrant, 2010-2016
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef _DIRMOD_H_
#define _DIRMOD_H_
#endif

View File

@@ -38,5 +38,6 @@
#define ERR_INTERNAL 15 #define ERR_INTERNAL 15
#define ERR_MONITORING_FAIL 16 #define ERR_MONITORING_FAIL 16
#define ERR_BAD_BACKUP_LABEL 17 #define ERR_BAD_BACKUP_LABEL 17
#define ERR_SWITCHOVER_FAIL 18
#endif /* _ERRCODE_H_ */ #endif /* _ERRCODE_H_ */

345
repmgr.c
View File

@@ -87,7 +87,7 @@ static bool create_recovery_file(const char *data_dir);
static int test_ssh_connection(char *host, char *remote_user); static int test_ssh_connection(char *host, char *remote_user);
static int copy_remote_files(char *host, char *remote_user, char *remote_path, static int copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory, int server_version_num); char *local_path, bool is_directory, int server_version_num);
static int run_basebackup(const char *data_dir); static int run_basebackup(const char *data_dir, int server_version);
static void check_parameters_for_action(const int action); static void check_parameters_for_action(const int action);
static bool create_schema(PGconn *conn); static bool create_schema(PGconn *conn);
static void write_primary_conninfo(char *line); static void write_primary_conninfo(char *line);
@@ -121,7 +121,7 @@ static bool remote_command(const char *host, const char *user, const char *comma
static void format_db_cli_params(const char *conninfo, char *output); static void format_db_cli_params(const char *conninfo, char *output);
static bool copy_file(const char *old_filename, const char *new_filename); static bool copy_file(const char *old_filename, const char *new_filename);
static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label); static bool read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
/* Global variables */ /* Global variables */
static const char *keywords[6]; static const char *keywords[6];
@@ -187,6 +187,7 @@ main(int argc, char **argv)
{"config-archive-dir", required_argument, NULL, 5}, {"config-archive-dir", required_argument, NULL, 5},
{"pg_rewind", optional_argument, NULL, 6}, {"pg_rewind", optional_argument, NULL, 6},
{"pwprompt", optional_argument, NULL, 7}, {"pwprompt", optional_argument, NULL, 7},
{"csv", no_argument, NULL, 8},
{"help", no_argument, NULL, '?'}, {"help", no_argument, NULL, '?'},
{"version", no_argument, NULL, 'V'}, {"version", no_argument, NULL, 'V'},
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
@@ -427,6 +428,9 @@ main(int argc, char **argv)
case 7: case 7:
runtime_options.witness_pwprompt = true; runtime_options.witness_pwprompt = true;
break; break;
case 8:
runtime_options.csv_mode = true;
break;
default: default:
{ {
@@ -764,9 +768,9 @@ do_cluster_show(void)
conn = establish_db_connection(options.conninfo, true); conn = establish_db_connection(options.conninfo, true);
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"SELECT conninfo, type, name, upstream_node_name" "SELECT conninfo, type, name, upstream_node_name, id"
" FROM %s.repl_show_nodes", " FROM %s.repl_show_nodes",
get_repmgr_schema_quoted(conn)); get_repmgr_schema_quoted(conn));
log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery ); log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery );
@@ -813,21 +817,24 @@ do_cluster_show(void)
upstream_length = upstream_length_cur; upstream_length = upstream_length_cur;
} }
printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header); if (! runtime_options.csv_mode)
printf("----------+-"); {
printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header);
printf("----------+-");
for (i = 0; i < name_length; i++) for (i = 0; i < name_length; i++)
printf("-"); printf("-");
printf("-|-"); printf("-|-");
for (i = 0; i < upstream_length; i++) for (i = 0; i < upstream_length; i++)
printf("-"); printf("-");
printf("-|-"); printf("-|-");
for (i = 0; i < conninfo_length; i++) for (i = 0; i < conninfo_length; i++)
printf("-"); printf("-");
printf("\n"); printf("\n");
}
for (i = 0; i < PQntuples(res); i++) for (i = 0; i < PQntuples(res); i++)
{ {
@@ -841,11 +848,20 @@ do_cluster_show(void)
else else
strcpy(node_role, "* master"); strcpy(node_role, "* master");
printf("%-10s", node_role); if (runtime_options.csv_mode)
printf("| %-*s ", name_length, PQgetvalue(res, i, 2)); {
printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3)); int connection_status =
printf("| %s\n", PQgetvalue(res, i, 0)); (PQstatus(conn) == CONNECTION_OK) ?
(is_standby(conn) ? 1 : 0) : -1;
printf("%s,%d\n", PQgetvalue(res, i, 4), connection_status);
}
else
{
printf("%-10s", node_role);
printf("| %-*s ", name_length, PQgetvalue(res, i, 2));
printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3));
printf("| %s\n", PQgetvalue(res, i, 0));
}
PQfinish(conn); PQfinish(conn);
} }
@@ -1110,8 +1126,9 @@ do_standby_register(void)
PGconn *master_conn; PGconn *master_conn;
int ret; int ret;
bool record_created; bool record_created;
t_node_info node_record = T_NODE_INFO_INITIALIZER;
int node_result;
log_info(_("connecting to standby database\n")); log_info(_("connecting to standby database\n"));
conn = establish_db_connection(options.conninfo, true); conn = establish_db_connection(options.conninfo, true);
@@ -1168,6 +1185,28 @@ do_standby_register(void)
} }
} }
/*
* Check that an active node with the same node_name doesn't exist already
*/
node_result = get_node_record_by_name(master_conn,
options.cluster_name,
options.node_name,
&node_record);
if (node_result)
{
if (node_record.active == true)
{
log_err(_("Node %i exists already with node_name \"%s\"\n"),
node_record.node_id,
options.node_name);
PQfinish(master_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
record_created = create_node_record(master_conn, record_created = create_node_record(master_conn,
"standby register", "standby register",
options.node, options.node,
@@ -1393,7 +1432,7 @@ do_standby_clone(void)
if (*runtime_options.recovery_min_apply_delay) if (*runtime_options.recovery_min_apply_delay)
{ {
if (get_server_version(upstream_conn, NULL) < 90400) if (server_version_num < 90400)
{ {
log_err(_("PostgreSQL 9.4 or greater required for --recovery-min-apply-delay\n")); log_err(_("PostgreSQL 9.4 or greater required for --recovery-min-apply-delay\n"));
PQfinish(upstream_conn); PQfinish(upstream_conn);
@@ -1417,7 +1456,7 @@ do_standby_clone(void)
{ {
TablespaceListCell *cell; TablespaceListCell *cell;
if (get_server_version(upstream_conn, NULL) < 90400 && !runtime_options.rsync_only) if (server_version_num < 90400 && !runtime_options.rsync_only)
{ {
log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n")); log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n"));
PQfinish(upstream_conn); PQfinish(upstream_conn);
@@ -1673,12 +1712,13 @@ do_standby_clone(void)
} }
/* Read backup label copied from primary */ /* Read backup label copied from primary */
/* XXX ensure this function does not exit on error as we'd need to stop the backup */ if (read_backup_label(local_data_directory, &backup_label) == false)
read_backup_label(local_data_directory, &backup_label); {
r = retval = ERR_BAD_BACKUP_LABEL;
goto stop_backup;
}
printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file); /* Copy tablespaces and, if required, remap to a new location */
/* Handle tablespaces */
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
" SELECT oid, pg_tablespace_location(oid) AS spclocation " " SELECT oid, pg_tablespace_location(oid) AS spclocation "
@@ -1715,7 +1755,6 @@ do_standby_clone(void)
appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1)); appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1));
/* Check if tablespace path matches one of the provided tablespace mappings */ /* Check if tablespace path matches one of the provided tablespace mappings */
if (options.tablespace_mapping.head != NULL) if (options.tablespace_mapping.head != NULL)
{ {
for (cell = options.tablespace_mapping.head; cell; cell = cell->next) for (cell = options.tablespace_mapping.head; cell; cell = cell->next)
@@ -1758,10 +1797,15 @@ do_standby_clone(void)
goto stop_backup; goto stop_backup;
} }
/* Update symlinks in pg_tblspc */ /*
* If a valid mapping was provide for this tablespace, arrange for it to
* be remapped
* (if no tablespace mappings was provided, the link will be copied as-is
* by pg_basebackup or rsync and no action is required)
*/
if (mapping_found == true) if (mapping_found == true)
{ {
/* 9.5 and later - create a tablespace_map file */ /* 9.5 and later - append to the tablespace_map file */
if (server_version_num >= 90500) if (server_version_num >= 90500)
{ {
tablespace_map_rewrite = true; tablespace_map_rewrite = true;
@@ -1805,6 +1849,13 @@ do_standby_clone(void)
PQclear(res); PQclear(res);
/*
* For 9.5 and later, if tablespace remapping was requested, we'll need
* to rewrite the tablespace map file ourselves.
* The tablespace map file is read on startup and any links created by
* the backend; we could do this ourselves like for pre-9.5 servers, but
* it's better to rely on functionality the backend provides.
*/
if (server_version_num >= 90500 && tablespace_map_rewrite == true) if (server_version_num >= 90500 && tablespace_map_rewrite == true)
{ {
PQExpBufferData tablespace_map_filename; PQExpBufferData tablespace_map_filename;
@@ -1817,7 +1868,9 @@ do_standby_clone(void)
/* Unlink any existing file (it should be there, but we don't care if it isn't) */ /* Unlink any existing file (it should be there, but we don't care if it isn't) */
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT) if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
{ {
log_err(_("unable to remove tablespace_map file %s\n"), tablespace_map_filename.data); log_err(_("unable to remove tablespace_map file %s: %s\n"),
tablespace_map_filename.data,
strerror(errno));
r = retval = ERR_BAD_BASEBACKUP; r = retval = ERR_BAD_BASEBACKUP;
goto stop_backup; goto stop_backup;
@@ -1845,7 +1898,7 @@ do_standby_clone(void)
} }
else else
{ {
r = run_basebackup(local_data_directory); r = run_basebackup(local_data_directory, server_version_num);
if (r != 0) if (r != 0)
{ {
log_warning(_("standby clone: base backup failed\n")); log_warning(_("standby clone: base backup failed\n"));
@@ -1988,8 +2041,8 @@ stop_backup:
*/ */
if (runtime_options.rsync_only) if (runtime_options.rsync_only)
{ {
char script[MAXLEN];
char label_path[MAXPGPATH]; char label_path[MAXPGPATH];
char dirpath[MAXLEN] = "";
if (runtime_options.force) if (runtime_options.force)
{ {
@@ -1997,13 +2050,13 @@ stop_backup:
* Remove any existing WAL from the target directory, since * Remove any existing WAL from the target directory, since
* rsync's --exclude option doesn't do it. * rsync's --exclude option doesn't do it.
*/ */
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
local_data_directory); maxlen_snprintf(dirpath, "%s/pg_xlog/", local_data_directory);
r = system(script);
if (r != 0) if (!rmtree(dirpath, false))
{ {
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"), log_err(_("unable to empty local WAL directory %s\n"),
local_data_directory); dirpath);
exit(ERR_BAD_RSYNC); exit(ERR_BAD_RSYNC);
} }
} }
@@ -2024,17 +2077,17 @@ stop_backup:
* functionality of replication slots * functionality of replication slots
*/ */
if (server_version_num >= 90400 && if (server_version_num >= 90400 &&
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr) backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
{ {
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*", maxlen_snprintf(dirpath, "%s/pg_replslot/*",
local_data_directory); local_data_directory);
log_debug("deleting pg_replslot directory contents\n"); log_debug("deleting pg_replslot directory contents\n");
r = system(script);
if (r != 0) if (!rmtree(dirpath, false))
{ {
log_err(_("unable to empty replication slot directory %s/pg_replslot/\n"), log_err(_("unable to empty replication slot directory %s\n"),
local_data_directory); dirpath);
exit(ERR_BAD_RSYNC); exit(ERR_BAD_RSYNC);
} }
} }
@@ -2151,8 +2204,7 @@ parse_label_lsn(const char *label_key, const char *label_value)
{ {
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"), log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
label_key, label_value); label_key, label_value);
return InvalidXLogRecPtr;
exit(ERR_BAD_BACKUP_LABEL);
} }
return ptr; return ptr;
@@ -2173,7 +2225,7 @@ parse_label_lsn(const char *label_key, const char *label_value)
* *
*====================================== *======================================
*/ */
static void static bool
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label) read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
{ {
char label_path[MAXPGPATH]; char label_path[MAXPGPATH];
@@ -2198,7 +2250,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
{ {
log_err(_("read_backup_label: could not open backup label file %s: %s"), log_err(_("read_backup_label: could not open backup label file %s: %s"),
label_path, strerror(errno)); label_path, strerror(errno));
exit(ERR_BAD_BACKUP_LABEL); return false;
} }
log_info(_("read_backup_label: parsing backup label file '%s'\n"), log_info(_("read_backup_label: parsing backup label file '%s'\n"),
@@ -2220,7 +2272,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
{ {
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""), log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
label_key, label_value); label_key, label_value);
exit(ERR_BAD_BACKUP_LABEL); return false;
} }
log_debug("standby clone: got backup label entry \"%s: %s\"\n", log_debug("standby clone: got backup label entry \"%s: %s\"\n",
@@ -2232,14 +2284,19 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
char wal_filename[MAXLEN]; char wal_filename[MAXLEN];
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename); nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
if (nmatches != 2) if (nmatches != 2)
{ {
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n")); log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
exit(ERR_BAD_BACKUP_LABEL); return false;
} }
out_backup_label->start_wal_location = out_backup_label->start_wal_location =
parse_label_lsn(&label_key[0], start_wal_location); parse_label_lsn(&label_key[0], start_wal_location);
if (out_backup_label->start_wal_location == InvalidXLogRecPtr)
return false;
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN); (void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
out_backup_label->start_wal_file[MAXLEN-1] = '\0'; out_backup_label->start_wal_file[MAXLEN-1] = '\0';
} }
@@ -2247,6 +2304,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
{ {
out_backup_label->checkpoint_location = out_backup_label->checkpoint_location =
parse_label_lsn(&label_key[0], &label_value[0]); parse_label_lsn(&label_key[0], &label_value[0]);
if (out_backup_label->checkpoint_location == InvalidXLogRecPtr)
return false;
} }
else if (strcmp(label_key, "BACKUP METHOD") == 0) else if (strcmp(label_key, "BACKUP METHOD") == 0)
{ {
@@ -2272,6 +2332,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
{ {
out_backup_label->min_failover_slot_lsn = out_backup_label->min_failover_slot_lsn =
parse_label_lsn(&label_key[0], &label_value[0]); parse_label_lsn(&label_key[0], &label_value[0]);
if (out_backup_label->min_failover_slot_lsn == InvalidXLogRecPtr)
return false;
} }
else else
{ {
@@ -2281,6 +2344,10 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back
} }
(void) fclose(label_file); (void) fclose(label_file);
log_debug(_("read_backup_label: label is %s; start wal file is %s\n"), out_backup_label->label, out_backup_label->start_wal_file);
return true;
} }
static void static void
@@ -2663,6 +2730,8 @@ do_standby_follow(void)
* *
* TODO: * TODO:
* - make connection test timeouts/intervals configurable (see below) * - make connection test timeouts/intervals configurable (see below)
* - add command line option --remote_pg_bindir or similar to
* optionally handle cases where the remote pg_bindir is different
*/ */
static void static void
@@ -2673,7 +2742,7 @@ do_standby_switchover(void)
int server_version_num; int server_version_num;
bool use_pg_rewind; bool use_pg_rewind;
/* the remote server is the primary which will be demoted */ /* the remote server is the primary to be demoted */
char remote_conninfo[MAXCONNINFO] = ""; char remote_conninfo[MAXCONNINFO] = "";
char remote_host[MAXLEN]; char remote_host[MAXLEN];
char remote_data_directory[MAXLEN]; char remote_data_directory[MAXLEN];
@@ -2689,9 +2758,9 @@ do_standby_switchover(void)
char repmgr_db_cli_params[MAXLEN] = ""; char repmgr_db_cli_params[MAXLEN] = "";
int query_result; int query_result;
t_node_info remote_node_record; t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
bool connection_success; bool connection_success,
shutdown_success;
/* /*
* SANITY CHECKS * SANITY CHECKS
@@ -2711,7 +2780,7 @@ do_standby_switchover(void)
log_err(_("switchover must be executed from the standby node to be promoted\n")); log_err(_("switchover must be executed from the standby node to be promoted\n"));
PQfinish(local_conn); PQfinish(local_conn);
exit(ERR_BAD_CONFIG); exit(ERR_SWITCHOVER_FAIL);
} }
server_version_num = check_server_version(local_conn, "master", true, NULL); server_version_num = check_server_version(local_conn, "master", true, NULL);
@@ -2821,8 +2890,8 @@ do_standby_switchover(void)
/* 9.5 and later have pg_rewind built-in - always use that */ /* 9.5 and later have pg_rewind built-in - always use that */
use_pg_rewind = true; use_pg_rewind = true;
maxlen_snprintf(remote_pg_rewind, maxlen_snprintf(remote_pg_rewind,
"%s/pg_rewind", "%s",
pg_bindir); make_pg_path("pg_rewind"));
} }
else else
{ {
@@ -2842,8 +2911,8 @@ do_standby_switchover(void)
else else
{ {
maxlen_snprintf(remote_pg_rewind, maxlen_snprintf(remote_pg_rewind,
"%s/pg_rewind", "%s",
pg_bindir); make_pg_path("pg_rewind"));
} }
} }
else else
@@ -3038,8 +3107,8 @@ do_standby_switchover(void)
log_verbose(LOG_DEBUG, "remote_archive_config_dir: %s\n", remote_archive_config_dir); log_verbose(LOG_DEBUG, "remote_archive_config_dir: %s\n", remote_archive_config_dir);
maxlen_snprintf(command, maxlen_snprintf(command,
"%s/repmgr standby archive-config -f %s --config-archive-dir=%s", "%s standby archive-config -f %s --config-archive-dir=%s",
pg_bindir, make_pg_path("repmgr"),
runtime_options.remote_config_file, runtime_options.remote_config_file,
remote_archive_config_dir); remote_archive_config_dir);
@@ -3094,41 +3163,63 @@ do_standby_switchover(void)
termPQExpBuffer(&command_output); termPQExpBuffer(&command_output);
connection_success = false; shutdown_success = false;
/* loop for timeout waiting for current primary to stop */ /* loop for timeout waiting for current primary to stop */
for(i = 0; i < options.reconnect_attempts; i++) for (i = 0; i < options.reconnect_attempts; i++)
{ {
/* Check whether primary is available */ /* Check whether primary is available */
remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */ PGPing ping_res = PQping(remote_conninfo);
/* XXX failure to connect doesn't mean the server is necessarily /* database server could not be contacted */
* completely stopped - we need to better detect the reason for if (ping_res == PQPING_NO_RESPONSE)
* connection failure ("server not listening" vs "shutting down")
*
* -> check is_pgup()
*/
if (PQstatus(remote_conn) != CONNECTION_OK)
{ {
connection_success = true; bool command_success;
log_notice(_("current master has been stopped\n")); /*
break; * directly access the server and check that the
* pidfile has gone away so we can be sure the server is actually
* shut down and the PQPING_NO_RESPONSE is not due to other issues
* such as coincidental network failure.
*/
initPQExpBuffer(&command_output);
maxlen_snprintf(command,
"ls %s/postmaster.pid >/dev/null 2>&1 && echo 1 || echo 0",
remote_data_directory);
command_success = remote_command(
remote_host,
runtime_options.remote_user,
command,
&command_output);
if (command_success == true && *command_output.data == '0')
{
shutdown_success = true;
log_notice(_("current master has been stopped\n"));
termPQExpBuffer(&command_output);
break;
}
termPQExpBuffer(&command_output);
} }
PQfinish(remote_conn);
// configurable? /* XXX make configurable? */
sleep(options.reconnect_interval); sleep(options.reconnect_interval);
i++; i++;
} }
if (connection_success == false) if (shutdown_success == false)
{ {
log_err(_("master server did not shut down\n")); log_err(_("master server did not shut down\n"));
log_hint(_("check the master server status before performing any further actions")); log_hint(_("check the master server status before performing any further actions"));
exit(ERR_FAILOVER_FAIL); exit(ERR_SWITCHOVER_FAIL);
} }
/* promote this standby */ /* promote this standby */
@@ -3151,8 +3242,8 @@ do_standby_switchover(void)
/* Execute pg_rewind */ /* Execute pg_rewind */
maxlen_snprintf(command, maxlen_snprintf(command,
"%s/pg_rewind -D %s --source-server=\\'%s\\'", "%s -D %s --source-server=\\'%s\\'",
pg_bindir, remote_pg_rewind,
remote_data_directory, remote_data_directory,
options.conninfo); options.conninfo);
@@ -3173,8 +3264,8 @@ do_standby_switchover(void)
/* Restore any previously archived config files */ /* Restore any previously archived config files */
maxlen_snprintf(command, maxlen_snprintf(command,
"%s/repmgr standby restore-config -D %s --config-archive-dir=%s", "%s standby restore-config -D %s --config-archive-dir=%s",
pg_bindir, make_pg_path("repmgr"),
remote_data_directory, remote_data_directory,
remote_archive_config_dir); remote_archive_config_dir);
@@ -3231,8 +3322,8 @@ do_standby_switchover(void)
format_db_cli_params(options.conninfo, repmgr_db_cli_params); format_db_cli_params(options.conninfo, repmgr_db_cli_params);
maxlen_snprintf(command, maxlen_snprintf(command,
"%s/repmgr -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone", "%s -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone",
pg_bindir, make_pg_path("repmgr"),
remote_data_directory, remote_data_directory,
runtime_options.remote_config_file, runtime_options.remote_config_file,
repmgr_db_cli_params repmgr_db_cli_params
@@ -3257,8 +3348,8 @@ do_standby_switchover(void)
*/ */
format_db_cli_params(options.conninfo, repmgr_db_cli_params); format_db_cli_params(options.conninfo, repmgr_db_cli_params);
maxlen_snprintf(command, maxlen_snprintf(command,
"%s/repmgr -D %s -f %s %s standby follow", "%s -D %s -f %s %s standby follow",
pg_bindir, make_pg_path("repmgr"),
remote_data_directory, remote_data_directory,
runtime_options.remote_config_file, runtime_options.remote_config_file,
repmgr_db_cli_params repmgr_db_cli_params
@@ -3292,7 +3383,7 @@ do_standby_switchover(void)
if (is_standby(remote_conn) == 0) if (is_standby(remote_conn) == 0)
{ {
log_err(_("new standby (old master) is not a standby\n")); log_err(_("new standby (old master) is not a standby\n"));
exit(ERR_FAILOVER_FAIL); exit(ERR_SWITCHOVER_FAIL);
} }
connection_success = true; connection_success = true;
break; break;
@@ -3306,7 +3397,7 @@ do_standby_switchover(void)
if (connection_success == false) if (connection_success == false)
{ {
log_err(_("unable to connect to new standby (old master)\n")); log_err(_("unable to connect to new standby (old master)\n"));
exit(ERR_FAILOVER_FAIL); exit(ERR_SWITCHOVER_FAIL);
} }
log_debug("new standby is in recovery\n"); log_debug("new standby is in recovery\n");
@@ -3315,15 +3406,14 @@ do_standby_switchover(void)
local_conn = establish_db_connection(options.conninfo, true); local_conn = establish_db_connection(options.conninfo, true);
query_result = get_node_replication_state(local_conn, remote_node_record.name, remote_node_replication_state); query_result = get_node_replication_state(local_conn, remote_node_record.name, remote_node_replication_state);
if (query_result == -1) if (query_result == -1)
{ {
log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id); log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id);
PQfinish(local_conn); PQfinish(local_conn);
// errcode? exit(ERR_SWITCHOVER_FAIL);
exit(ERR_DB_QUERY);
} }
if (query_result == 0) if (query_result == 0)
@@ -3332,7 +3422,6 @@ do_standby_switchover(void)
} }
else else
{ {
/* XXX other valid values? */
/* XXX we should poll for a while in case the node takes time to connect to the primary */ /* XXX we should poll for a while in case the node takes time to connect to the primary */
if (strcmp(remote_node_replication_state, "streaming") == 0 || if (strcmp(remote_node_replication_state, "streaming") == 0 ||
strcmp(remote_node_replication_state, "catchup") == 0) strcmp(remote_node_replication_state, "catchup") == 0)
@@ -3341,9 +3430,16 @@ do_standby_switchover(void)
} }
else else
{ {
log_err(_("node %i replication state is \"%s\"\n"), remote_node_id, remote_node_replication_state); /*
* Other possible replication states are:
* - startup
* - backup
* - UNKNOWN
*/
log_err(_("node %i has unexpected replication state \"%s\"\n"),
remote_node_id, remote_node_replication_state);
PQfinish(local_conn); PQfinish(local_conn);
exit(ERR_DB_QUERY); exit(ERR_SWITCHOVER_FAIL);
} }
} }
@@ -3355,7 +3451,7 @@ do_standby_switchover(void)
if (options.use_replication_slots) if (options.use_replication_slots)
{ {
t_node_info local_node_record; t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
query_result = get_node_record(local_conn, options.cluster_name, options.node, &local_node_record); query_result = get_node_record(local_conn, options.cluster_name, options.node, &local_node_record);
@@ -4120,12 +4216,13 @@ do_help(void)
printf(_(" -w, --wal-keep-segments=VALUE (standby clone) minimum value for the GUC\n" \ printf(_(" -w, --wal-keep-segments=VALUE (standby clone) minimum value for the GUC\n" \
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS); " wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
printf(_(" -W, --wait (standby follow) wait for a master to appear\n")); printf(_(" -W, --wait (standby follow) wait for a master to appear\n"));
printf(_(" -m, --mode (standby switchover) shutdown mode (smart|fast|immediate)\n")); printf(_(" -m, --mode (standby switchover) shutdown mode (\"fast\" - default, \"smart\" or \"immediate\")\n"));
printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \ printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \
" the current master\n")); " the current master\n"));
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \ printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
" optionally providing a path to the binary\n")); " optionally providing a path to the binary\n"));
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n")); printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
printf(_(" --csv (cluster show) output in CSV mode (0 = master, 1 = standby, -1 = down)\n"));
/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/ /* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/
printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n")); printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n"));
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \ printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
@@ -4216,6 +4313,15 @@ create_recovery_file(const char *data_dir)
log_debug(_("recovery.conf: %s"), line); log_debug(_("recovery.conf: %s"), line);
} }
/* If restore_command is set, we use it as restore_command in recovery.conf */
if (strcmp(options.restore_command, "") != 0)
{
maxlen_snprintf(line, "restore_command = '%s'\n",
options.restore_command);
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
return false;
log_debug(_("recovery.conf: %s"), line);
}
fclose(recovery_file); fclose(recovery_file);
return true; return true;
@@ -4371,12 +4477,12 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
static int static int
run_basebackup(const char *data_dir) run_basebackup(const char *data_dir, int server_version)
{ {
char script[MAXLEN]; char script[MAXLEN];
int r = 0; int r = 0;
PQExpBufferData params; PQExpBufferData params;
TablespaceListCell *cell; TablespaceListCell *cell;
/* Create pg_basebackup command line options */ /* Create pg_basebackup command line options */
@@ -4411,6 +4517,28 @@ run_basebackup(const char *data_dir)
} }
} }
/*
* To ensure we have all the WALs needed during basebackup execution we stream
* them as the backup is taking place.
* Not necessary if on 9.6 if we have replication slots set in repmgr.conf
* (starting at 9.6 there is an option, which we use, to reserve the LSN at
* creation time)
*/
if (server_version < 90600 || !options.use_replication_slots)
{
/*
* We're going to check first if the user set the xlog method in the repmgr.conf
* file. We don't want to have conflicts with pg_basebackup due to specifying the
* method twice.
*/
const char xlog_short[4] = "-X ";
const char xlog_long[14] = "--xlog-method";
if (strstr(options.pg_basebackup_options, xlog_short) == NULL && strstr(options.pg_basebackup_options, xlog_long) == NULL )
{
appendPQExpBuffer(&params, " -X stream");
}
}
maxlen_snprintf(script, maxlen_snprintf(script,
"%s -l \"repmgr base backup\" %s %s", "%s -l \"repmgr base backup\" %s %s",
make_pg_path("pg_basebackup"), make_pg_path("pg_basebackup"),
@@ -4423,7 +4551,7 @@ run_basebackup(const char *data_dir)
/* /*
* As of 9.4, pg_basebackup only ever returns 0 or 1 * As of 9.4, pg_basebackup only ever returns 0 or 1
*/ */
r = system(script); r = system(script);
@@ -4624,6 +4752,15 @@ check_parameters_for_action(const int action)
} }
} }
/* Warn about parameters which apply to CLUSTER SHOW only */
if (action != CLUSTER_SHOW)
{
if (runtime_options.csv_mode)
{
error_list_append(&cli_warnings, _("--csv can only be used when executing CLUSTER SHOW"));
}
}
return; return;
} }
@@ -5080,6 +5217,10 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
NULL, NULL,
}; };
/*
* Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases
* for "replica", but current_setting() will of course always return "replica"
*/
char *levels_96plus[] = { char *levels_96plus[] = {
"replica", "replica",
"logical", "logical",

View File

@@ -19,7 +19,7 @@
# Node ID and name # Node ID and name
# (Note: we recommend to avoid naming nodes after their initial # (Note: we recommend to avoid naming nodes after their initial
# replication funcion, as this will cause confusion when e.g. # replication function, as this will cause confusion when e.g.
# "standby2" is promoted to primary) # "standby2" is promoted to primary)
#node=2 # a unique integer #node=2 # a unique integer
#node_name=node2 # an arbitrary (but unique) string; we recommend using #node_name=node2 # an arbitrary (but unique) string; we recommend using
@@ -28,8 +28,16 @@
# Database connection information as a conninfo string # Database connection information as a conninfo string
# This must be accessible to all servers in the cluster; for details see: # This must be accessible to all servers in the cluster; for details see:
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING #
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
#
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' #conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
#
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
# conninfo string to determine the length of time which elapses before
# a network connection attempt is abandoned; for details see:
#
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
# Optional configuration items # Optional configuration items
# ============================ # ============================
@@ -113,6 +121,10 @@
# #
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace # tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
# You can specify a restore_command to be used in the recovery.conf that
# will be placed in the cloned standby
#
# restore_command = cp /path/to/archived/wals/%f %p
# Failover settings (repmgrd) # Failover settings (repmgrd)
# --------------------------- # ---------------------------

View File

@@ -28,6 +28,7 @@
#include "dbutils.h" #include "dbutils.h"
#include "errcode.h" #include "errcode.h"
#include "config.h" #include "config.h"
#include "dirmod.h"
#define MIN_SUPPORTED_VERSION "9.3" #define MIN_SUPPORTED_VERSION "9.3"
#define MIN_SUPPORTED_VERSION_NUM 90300 #define MIN_SUPPORTED_VERSION_NUM 90300
@@ -69,7 +70,7 @@ typedef struct
bool rsync_only; bool rsync_only;
bool fast_checkpoint; bool fast_checkpoint;
bool ignore_external_config_files; bool ignore_external_config_files;
char pg_ctl_mode[MAXLEN]; bool csv_mode;
char masterport[MAXLEN]; char masterport[MAXLEN];
/* /*
* configuration file parameters which can be overridden on the * configuration file parameters which can be overridden on the
@@ -80,6 +81,7 @@ typedef struct
/* parameter used by STANDBY SWITCHOVER */ /* parameter used by STANDBY SWITCHOVER */
char remote_config_file[MAXLEN]; char remote_config_file[MAXLEN];
char pg_rewind[MAXPGPATH]; char pg_rewind[MAXPGPATH];
char pg_ctl_mode[MAXLEN];
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */ /* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
char config_archive_dir[MAXLEN]; char config_archive_dir[MAXLEN];
/* parameter used by CLUSTER CLEANUP */ /* parameter used by CLUSTER CLEANUP */
@@ -94,7 +96,7 @@ typedef struct
bool initdb_no_pwprompt; bool initdb_no_pwprompt;
} t_runtime_options; } t_runtime_options;
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } #define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", "", false }
struct BackupLabel struct BackupLabel
{ {

149
repmgrd.c
View File

@@ -44,11 +44,11 @@
/* Local info */ /* Local info */
t_configuration_options local_options; t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
PGconn *my_local_conn = NULL; PGconn *my_local_conn = NULL;
/* Master info */ /* Master info */
t_configuration_options master_options; t_configuration_options master_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
PGconn *master_conn = NULL; PGconn *master_conn = NULL;
@@ -61,8 +61,6 @@ bool failover_done = false;
char *pid_file = NULL; char *pid_file = NULL;
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
static void help(void); static void help(void);
static void usage(void); static void usage(void);
static void check_cluster_configuration(PGconn *conn); static void check_cluster_configuration(PGconn *conn);
@@ -399,7 +397,7 @@ main(int argc, char **argv)
case STANDBY: case STANDBY:
/* We need the node id of the master server as well as a connection to it */ /* We need the node id of the master server as well as a connection to it */
log_info(_("connecting to master node '%s'\n"), log_info(_("connecting to master node of cluster '%s'\n"),
local_options.cluster_name); local_options.cluster_name);
master_conn = get_master_connection(my_local_conn, master_conn = get_master_connection(my_local_conn,
@@ -462,16 +460,16 @@ main(int argc, char **argv)
do do
{ {
log_verbose(LOG_DEBUG, "standby check loop...\n"); if (node_info.type == STANDBY)
if (node_info.type == WITNESS)
{
witness_monitor();
}
else if (node_info.type == STANDBY)
{ {
log_verbose(LOG_DEBUG, "standby check loop...\n");
standby_monitor(); standby_monitor();
} }
else if (node_info.type == WITNESS)
{
log_verbose(LOG_DEBUG, "witness check loop...\n");
witness_monitor();
}
sleep(local_options.monitor_interval_secs); sleep(local_options.monitor_interval_secs);
@@ -667,7 +665,7 @@ witness_monitor(void)
" replication_lag, apply_lag )" " replication_lag, apply_lag )"
" VALUES(%d, %d, " " VALUES(%d, %d, "
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, " " '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
" pg_current_xlog_location(), NULL, " " pg_catalog.pg_current_xlog_location(), NULL, "
" 0, 0) ", " 0, 0) ",
get_repmgr_schema_quoted(my_local_conn), get_repmgr_schema_quoted(my_local_conn),
master_options.node, master_options.node,
@@ -695,7 +693,7 @@ standby_monitor(void)
{ {
PGresult *res; PGresult *res;
char monitor_standby_timestamp[MAXLEN]; char monitor_standby_timestamp[MAXLEN];
char last_wal_master_location[MAXLEN]; char last_wal_primary_location[MAXLEN];
char last_xlog_receive_location[MAXLEN]; char last_xlog_receive_location[MAXLEN];
char last_xlog_replay_location[MAXLEN]; char last_xlog_replay_location[MAXLEN];
char last_xact_replay_timestamp[MAXLEN]; char last_xact_replay_timestamp[MAXLEN];
@@ -706,6 +704,9 @@ standby_monitor(void)
XLogRecPtr lsn_last_xlog_receive_location; XLogRecPtr lsn_last_xlog_receive_location;
XLogRecPtr lsn_last_xlog_replay_location; XLogRecPtr lsn_last_xlog_replay_location;
long long unsigned int replication_lag;
long long unsigned int apply_lag;
int connection_retries, int connection_retries,
ret; ret;
bool did_retry = false; bool did_retry = false;
@@ -750,10 +751,9 @@ standby_monitor(void)
? "master" ? "master"
: "upstream"; : "upstream";
// ZZZ "5 minutes"?
/* /*
* Check if the upstream node is still available, if after 5 minutes of retries * Check that the upstream node is still available
* we cannot reconnect, try to get a new upstream node. * If not, initiate failover process
*/ */
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo); check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
@@ -820,26 +820,24 @@ standby_monitor(void)
else if (local_options.failover == AUTOMATIC_FAILOVER) else if (local_options.failover == AUTOMATIC_FAILOVER)
{ {
/* /*
* When we returns from this function we will have a new master * When we return from this function we will have a new master
* and a new master_conn * and a new master_conn
*/ *
/*
* Failover handling is handled differently depending on whether * Failover handling is handled differently depending on whether
* the failed node is the master or a cascading standby * the failed node is the master or a cascading standby
*/ */
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
if (upstream_node.type == MASTER) if (upstream_node.type == MASTER)
{ {
log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"), log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
node_info.upstream_node_id); node_info.upstream_node_id);
do_master_failover(); do_master_failover();
} }
else else
{ {
log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"), log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"),
node_info.upstream_node_id); node_info.upstream_node_id);
if (!do_upstream_standby_failover(upstream_node)) if (!do_upstream_standby_failover(upstream_node))
{ {
@@ -847,20 +845,20 @@ standby_monitor(void)
initPQExpBuffer(&errmsg); initPQExpBuffer(&errmsg);
appendPQExpBuffer(&errmsg, appendPQExpBuffer(&errmsg,
_("unable to reconnect to new upstream node, terminating...")); _("unable to reconnect to new upstream node, terminating..."));
log_err("%s\n", errmsg.data); log_err("%s\n", errmsg.data);
create_event_record(master_conn, create_event_record(master_conn,
&local_options, &local_options,
local_options.node, local_options.node,
"repmgrd_shutdown", "repmgrd_shutdown",
false, false,
errmsg.data); errmsg.data);
terminate(ERR_DB_CON); terminate(ERR_DB_CON);
} }
} }
return; return;
} }
} }
@@ -963,7 +961,7 @@ standby_monitor(void)
if (active_master_id != master_options.node) if (active_master_id != master_options.node)
{ {
log_notice(_("connecting to active master (node %i)...\n"), active_master_id); \ log_notice(_("connecting to active master (node %i)...\n"), active_master_id);
if (master_conn != NULL) if (master_conn != NULL)
{ {
PQfinish(master_conn); PQfinish(master_conn);
@@ -986,9 +984,11 @@ standby_monitor(void)
/* Get local xlog info */ /* Get local xlog info */
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "SELECT CURRENT_TIMESTAMP, "
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), " "pg_catalog.pg_last_xlog_receive_location(), "
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()"); "pg_catalog.pg_last_xlog_replay_location(), "
"pg_catalog.pg_last_xact_replay_timestamp(), "
"pg_catalog.pg_last_xlog_receive_location() >= pg_catalog.pg_last_xlog_replay_location()");
res = PQexec(my_local_conn, sqlquery); res = PQexec(my_local_conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -1038,7 +1038,12 @@ standby_monitor(void)
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n"); "Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
} }
/* Get master xlog info */ /*
* Get master xlog position
*
* TODO: investigate whether pg_current_xlog_insert_location() would be a better
* choice; see: https://github.com/2ndQuadrant/repmgr/issues/189
*/
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()"); sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
res = PQexec(master_conn, sqlquery); res = PQexec(master_conn, sqlquery);
@@ -1049,23 +1054,43 @@ standby_monitor(void)
return; return;
} }
strncpy(last_wal_master_location, PQgetvalue(res, 0, 0), MAXLEN); strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
PQclear(res); PQclear(res);
/* Calculate the lag */ lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
/* Calculate apply lag */
if (last_xlog_receive_location_gte_replayed == false) if (last_xlog_receive_location_gte_replayed == false)
{ {
lsn_last_xlog_receive_location = lsn_last_xlog_replay_location; /*
* We're not receiving streaming WAL - in this case the receive location
* equals the last replayed location
*/
apply_lag = 0;
strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN);
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
} }
else else
{ {
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
} }
/* Calculate replication lag */
if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
{
replication_lag = (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location);
}
else
{
/* This should never happen, but in case it does set lag to zero */
log_warning("Master xlog (%s) location appears less than standby receive location (%s)\n",
last_wal_primary_location,
last_xlog_receive_location);
replication_lag = 0;
}
/* /*
* Build the SQL to execute on master * Build the SQL to execute on master
*/ */
@@ -1092,11 +1117,10 @@ standby_monitor(void)
local_options.node, local_options.node,
monitor_standby_timestamp, monitor_standby_timestamp,
last_xact_replay_timestamp, last_xact_replay_timestamp,
last_wal_master_location, last_wal_primary_location,
last_xlog_receive_location, last_xlog_receive_location,
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location), replication_lag,
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location)); apply_lag);
/* /*
* Execute the query asynchronously, but don't check for a result. We will * Execute the query asynchronously, but don't check for a result. We will
* check the result next time we pause for a monitor step. * check the result next time we pause for a monitor step.
@@ -1143,8 +1167,8 @@ do_master_failover(void)
*/ */
t_node_info nodes[FAILOVER_NODES_MAX_CHECK]; t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
/* Store details of the failed node here */ /* Store details of the failed node here */
t_node_info failed_master = T_NODE_INFO_INITIALIZER; t_node_info failed_master = T_NODE_INFO_INITIALIZER;
/* Store details of the best candidate for promotion to master here */ /* Store details of the best candidate for promotion to master here */
t_node_info best_candidate = T_NODE_INFO_INITIALIZER; t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
@@ -1154,7 +1178,7 @@ do_master_failover(void)
"SELECT id, conninfo, type, upstream_node_id " "SELECT id, conninfo, type, upstream_node_id "
" FROM %s.repl_nodes " " FROM %s.repl_nodes "
" WHERE cluster = '%s' " " WHERE cluster = '%s' "
" AND active IS TRUE " " AND active IS TRUE "
" AND priority > 0 " " AND priority > 0 "
" ORDER BY priority DESC, id " " ORDER BY priority DESC, id "
" LIMIT %i ", " LIMIT %i ",
@@ -1167,7 +1191,6 @@ do_master_failover(void)
{ {
log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn)); log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn));
PQclear(res); PQclear(res);
PQfinish(my_local_conn);
terminate(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }
@@ -1541,12 +1564,12 @@ do_master_failover(void)
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n")); log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
PQfinish(master_conn); PQfinish(master_conn);
master_conn = NULL;
/* no failover occurred but we'll want to restart connections */ /* no failover occurred but we'll want to restart connections */
failover_done = true; failover_done = true;
return; return;
} }
PQfinish(my_local_conn);
} }
log_err(_("promote command failed. You could check and try it manually.\n")); log_err(_("promote command failed. You could check and try it manually.\n"));
@@ -1901,7 +1924,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
static bool static bool
set_local_node_status(void) set_local_node_status(void)
{ {
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
int active_master_node_id = NODE_NOT_FOUND; int active_master_node_id = NODE_NOT_FOUND;
char master_conninfo[MAXLEN]; char master_conninfo[MAXLEN];
@@ -1994,10 +2017,12 @@ check_cluster_configuration(PGconn *conn)
log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema()); log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema());
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"SELECT oid FROM pg_class " "SELECT oid FROM pg_catalog.pg_class "
" WHERE oid = '%s.repl_nodes'::regclass ", " WHERE oid = '%s.repl_nodes'::regclass ",
get_repmgr_schema_quoted(master_conn)); get_repmgr_schema_quoted(master_conn));
res = PQexec(conn, sqlquery); res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn)); log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
@@ -2416,6 +2441,8 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
errmsg.data); errmsg.data);
PQfinish(conn); PQfinish(conn);
conn = NULL;
terminate(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }

View File

@@ -63,6 +63,15 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...); -- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
/* There's also an event table which we need to create */
CREATE TABLE repl_events (
node_id INTEGER NOT NULL,
event TEXT NOT NULL,
successful BOOLEAN NOT NULL DEFAULT TRUE,
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
details TEXT NULL
);
/* When you're sure of your changes, commit them */ /* When you're sure of your changes, commit them */
-- COMMIT; -- COMMIT;

View File

@@ -27,5 +27,6 @@
BEGIN; BEGIN;
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE; ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey,
ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE;
COMMIT; COMMIT;