mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf4de71523 | ||
|
|
770e6f758c | ||
|
|
3deb6784e7 | ||
|
|
ba275bb0c2 | ||
|
|
9735bb63a1 | ||
|
|
1e5792f8df | ||
|
|
a01fefa7d0 | ||
|
|
34eaf94b2b | ||
|
|
68e3a9d7ab | ||
|
|
2ad4f68700 | ||
|
|
00aa0c8c87 | ||
|
|
e8025c7c9f | ||
|
|
6a17360b4c | ||
|
|
9e5e843a4f | ||
|
|
734ae1825e | ||
|
|
41fe58764e | ||
|
|
58a5249b7e | ||
|
|
90c0bd4638 | ||
|
|
359e81a6d6 | ||
|
|
07d220cb00 | ||
|
|
4dfeffe087 | ||
|
|
18544c82ca | ||
|
|
0f86bdcd05 | ||
|
|
7d33c1e411 | ||
|
|
fec65bde3d | ||
|
|
4863ea98bc |
19
FAQ.md
19
FAQ.md
@@ -90,6 +90,23 @@ General
|
|||||||
|
|
||||||
This option is only available when using the `--rsync-only` option.
|
This option is only available when using the `--rsync-only` option.
|
||||||
|
|
||||||
|
- How can I make the witness server use a particular port?
|
||||||
|
|
||||||
|
By default the witness server is configured to use port 5499; this
|
||||||
|
is intended to support running the witness server as a separate
|
||||||
|
instance on a normal node server, rather than on its own dedicated server.
|
||||||
|
|
||||||
|
To specify a port for the witness server, supply the port number to
|
||||||
|
repmgr with the `-l/--local-port` command line option.
|
||||||
|
|
||||||
|
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
||||||
|
in `postgresql.conf` if I'm not using `repmgrd`?
|
||||||
|
|
||||||
|
No, the `repmgr_funcs` library is only needed when running `repmgrd`.
|
||||||
|
If you later decide to run `repmgrd`, you just need to add
|
||||||
|
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
||||||
|
|
||||||
|
|
||||||
`repmgrd`
|
`repmgrd`
|
||||||
---------
|
---------
|
||||||
|
|
||||||
@@ -102,7 +119,7 @@ General
|
|||||||
|
|
||||||
- How can I prevent a node from ever being promoted to master?
|
- How can I prevent a node from ever being promoted to master?
|
||||||
|
|
||||||
In `rempgr.conf`, set its priority to a value of 0 or less.
|
In `repmgr.conf`, set its priority to a value of 0 or less.
|
||||||
|
|
||||||
- Does `repmgrd` support delayed standbys?
|
- Does `repmgrd` support delayed standbys?
|
||||||
|
|
||||||
|
|||||||
14
HISTORY
14
HISTORY
@@ -1,4 +1,11 @@
|
|||||||
3.0
|
3.0.1 2015-04-16
|
||||||
|
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
||||||
|
When promoting a standby, have repmgr (not repmgrd) handle metadata updates (Ian)
|
||||||
|
Re-use replication slot if it already exists (Ian)
|
||||||
|
Prevent a test SSH connection being made when not needed (Ian)
|
||||||
|
Correct monitoring table column names (Ian)
|
||||||
|
|
||||||
|
3.0 2015-03-27
|
||||||
Require PostgreSQL 9.3 or later (Ian)
|
Require PostgreSQL 9.3 or later (Ian)
|
||||||
Use `pg_basebackup` by default (instead of `rsync`) to clone standby servers (Ian)
|
Use `pg_basebackup` by default (instead of `rsync`) to clone standby servers (Ian)
|
||||||
Use `pg_ctl promote` to promote a standby to primary
|
Use `pg_ctl promote` to promote a standby to primary
|
||||||
@@ -11,6 +18,11 @@
|
|||||||
General usability and logging message improvements (Ian)
|
General usability and logging message improvements (Ian)
|
||||||
Code consolidation and cleanup (Ian)
|
Code consolidation and cleanup (Ian)
|
||||||
|
|
||||||
|
2.0.3 2015-04-16
|
||||||
|
Add -S/--superuser option for witness database creation Ian)
|
||||||
|
Add -c/--fast-checkpoint option for cloning (Christoph)
|
||||||
|
Add option "--initdb-no-pwprompt" (Ian)
|
||||||
|
|
||||||
2.0.2 2015-02-17
|
2.0.2 2015-02-17
|
||||||
Add "--checksum" in rsync when using "--force" (Jaime)
|
Add "--checksum" in rsync when using "--force" (Jaime)
|
||||||
Use createdb/createuser instead of psql (Jaime)
|
Use createdb/createuser instead of psql (Jaime)
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ packages installed::
|
|||||||
|
|
||||||
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
||||||
|
|
||||||
If your using Debian packages for PostgreSQL and are building repmgr with the
|
If you're using Debian packages for PostgreSQL and are building repmgr with the
|
||||||
USE_PGXS option you also need to install the corresponding development package::
|
USE_PGXS option you also need to install the corresponding development package::
|
||||||
|
|
||||||
sudo apt-get install postgresql-server-dev-9.0
|
sudo apt-get install postgresql-server-dev-9.0
|
||||||
|
|||||||
@@ -71,7 +71,10 @@ Standby setup
|
|||||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
||||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
||||||
|
|
||||||
Note that at this point it does not matter if the `repmgr.conf` file is not found.
|
Note that the `repmgr.conf` file is not required when cloning a standby.
|
||||||
|
However we recommend providing a valid `repmgr.conf` if you wish to use
|
||||||
|
replication slots, or want `repmgr` to log the clone event to the
|
||||||
|
`repl_events` table.
|
||||||
|
|
||||||
This will clone the PostgreSQL database files from the master, including its
|
This will clone the PostgreSQL database files from the master, including its
|
||||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
Summary: repmgr
|
Summary: repmgr
|
||||||
Name: repmgr
|
Name: repmgr
|
||||||
Version: 3.0rc4
|
Version: 3.0
|
||||||
Release: 1
|
Release: 1
|
||||||
License: GPLv3
|
License: GPLv3
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
|
|||||||
@@ -1,89 +1,114 @@
|
|||||||
#!/bin/bash
|
#!/bin/sh
|
||||||
#
|
#
|
||||||
# repmgrd Start up the repmgrd daemon
|
# chkconfig: - 75 16
|
||||||
# repmrgd (replication manager daemon)
|
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||||
#
|
# processname: repmgrd
|
||||||
# chkconfig: - 75 16
|
# pidfile="/var/run/${NAME}.pid"
|
||||||
# description: repmgrd is the repliation manager daemon \
|
|
||||||
# The repmgrd replication management and monitoring daemon for PostgreSQL.
|
|
||||||
|
|
||||||
### BEGIN INIT INFO
|
|
||||||
# Provides: repmgrd
|
|
||||||
# Required-Start: $local_fs $remote_fs $network $syslog postgresql
|
|
||||||
# Required-Stop: $local_fs $remote_fs $network $syslog postgresql
|
|
||||||
# Should-Start: $syslog postgresql-9.3
|
|
||||||
# Should-Stop: $syslog postgresql-9.3
|
|
||||||
# Short-Description: start and stop repmrgd
|
|
||||||
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
|
||||||
# this is used to monitor a postgresql cluster.
|
|
||||||
### END INIT INFO
|
|
||||||
|
|
||||||
# Source function library.
|
# Source function library.
|
||||||
. /etc/init.d/functions
|
INITD=/etc/rc.d/init.d
|
||||||
|
. $INITD/functions
|
||||||
|
|
||||||
# Source networking configuration.
|
# Get function listing for cross-distribution logic.
|
||||||
|
TYPESET=`typeset -f|grep "declare"`
|
||||||
|
|
||||||
|
# Get network config.
|
||||||
. /etc/sysconfig/network
|
. /etc/sysconfig/network
|
||||||
|
|
||||||
prog=repmgrd
|
DESC="PostgreSQL replication management and monitoring daemon"
|
||||||
REPMGRD_ENABLED=yes
|
NAME=repmgrd
|
||||||
|
|
||||||
|
REPMGRD_ENABLED=no
|
||||||
REPMGRD_OPTS=
|
REPMGRD_OPTS=
|
||||||
REPMGRD_USER=postgres
|
REPMGRD_USER=postgres
|
||||||
DAEMONIZE="-d"
|
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
||||||
|
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||||
|
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
||||||
|
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
||||||
|
|
||||||
# pull in sysconfig settings
|
# Read configuration variable file if it is present
|
||||||
[ -f /etc/sysconfig/repmgrd ] && . /etc/sysconfig/repmgrd
|
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
||||||
|
|
||||||
LOCKFILE=/var/lock/subsys/$prog
|
# For SELinux we need to use 'runuser' not 'su'
|
||||||
RETVAL=0
|
if [ -x /sbin/runuser ]
|
||||||
|
then
|
||||||
|
SU=runuser
|
||||||
|
else
|
||||||
|
SU=su
|
||||||
|
fi
|
||||||
|
|
||||||
|
test -x $REPMGRD_BIN || exit 0
|
||||||
|
|
||||||
case "$REPMGRD_ENABLED" in
|
case "$REPMGRD_ENABLED" in
|
||||||
[Yy]*)
|
[Yy]*)
|
||||||
#nothing to do here
|
break
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
exit 2
|
exit 0
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
|
||||||
if [ -z "$REPMGRD_OPTS" ]
|
if [ -z "${REPMGRD_OPTS}" ]
|
||||||
then
|
then
|
||||||
echo "Not starting $prog, REPMGRD_OPTS not set in /etc/sysconfig/$prog"
|
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
||||||
exit 2
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
start() {
|
start()
|
||||||
[ "$EUID" != "0" ] && exit 4
|
{
|
||||||
[ "$NETWORKING" = "no" ] && exit 1
|
REPMGRD_START=$"Starting ${NAME} service: "
|
||||||
|
|
||||||
# Start daemons.
|
# Make sure startup-time log file is valid
|
||||||
echo -n $"Starting $prog: "
|
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
||||||
daemon --user $REPMGRD_USER $prog $DAEMONIZE $REPMGRD_OPTS
|
then
|
||||||
RETVAL=$?
|
touch "${REPMGRD_LOG}" || exit 1
|
||||||
|
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
||||||
|
chmod go-rwx "${REPMGRD_LOG}"
|
||||||
|
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -n "${REPMGRD_START}"
|
||||||
|
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
||||||
|
sleep 2
|
||||||
|
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
||||||
|
if [ "x${pid}" != "x" ]
|
||||||
|
then
|
||||||
|
success "${REPMGRD_START}"
|
||||||
|
touch "${REPMGRD_LOCK}"
|
||||||
|
echo $pid > "${REPMGRD_PIDFILE}"
|
||||||
echo
|
echo
|
||||||
[ $RETVAL -eq 0 ] && touch $LOCKFILE
|
else
|
||||||
return $RETVAL
|
failure "${REPMGRD_START}"
|
||||||
|
echo
|
||||||
|
script_result=1
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
stop() {
|
stop()
|
||||||
[ "$EUID" != "0" ] && exit 4
|
{
|
||||||
echo -n $"Shutting down $prog: "
|
echo -n $"Stopping ${NAME} service: "
|
||||||
killproc $prog
|
if [ -e "${REPMGRD_LOCK}" ]
|
||||||
RETVAL=$?
|
then
|
||||||
echo
|
killproc ${NAME}
|
||||||
[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
|
ret=$?
|
||||||
return $RETVAL
|
if [ $ret -eq 0 ]
|
||||||
}
|
then
|
||||||
status() {
|
echo_success
|
||||||
if [ -f "$LOCKFILE" ]; then
|
rm -f "${REPMGRD_PIDFILE}"
|
||||||
echo "$prog is running"
|
rm -f "${REPMGRD_LOCK}"
|
||||||
else
|
else
|
||||||
RETVAL=3
|
echo_failure
|
||||||
echo "$prog is stopped"
|
script_result=1
|
||||||
fi
|
fi
|
||||||
return $RETVAL
|
else
|
||||||
|
# not running; per LSB standards this is "ok"
|
||||||
|
echo_success
|
||||||
|
fi
|
||||||
|
echo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# See how we were called.
|
# See how we were called.
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
@@ -93,22 +118,16 @@ case "$1" in
|
|||||||
stop
|
stop
|
||||||
;;
|
;;
|
||||||
status)
|
status)
|
||||||
status $prog
|
status -p $REPMGRD_PIDFILE $NAME
|
||||||
|
script_result=$?
|
||||||
;;
|
;;
|
||||||
restart|force-reload)
|
restart)
|
||||||
stop
|
stop
|
||||||
start
|
start
|
||||||
;;
|
|
||||||
try-restart|condrestart)
|
|
||||||
if status $prog > /dev/null; then
|
|
||||||
stop
|
|
||||||
start
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
reload)
|
|
||||||
exit 3
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo $"Usage: $0 {start|stop|status|restart|try-restart|force-reload}"
|
echo $"Usage: $0 {start|stop|status|restart}"
|
||||||
exit 2
|
exit 2
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
exit $script_result
|
||||||
|
|||||||
@@ -1,4 +1,21 @@
|
|||||||
#default sysconfig file for repmrgd
|
# default settings for repmgrd. This file is source by /bin/sh from
|
||||||
#custom overrides can be placed here
|
# /etc/init.d/repmgrd
|
||||||
|
|
||||||
REPMGRD_OPTS="-f /etc/repmgr/repmgr.conf"
|
# disable repmgrd by default so it won't get started upon installation
|
||||||
|
# valid values: yes/no
|
||||||
|
REPMGRD_ENABLED=no
|
||||||
|
|
||||||
|
# Options for repmgrd (required)
|
||||||
|
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
||||||
|
|
||||||
|
# User to run repmgrd as
|
||||||
|
#REPMGRD_USER=postgres
|
||||||
|
|
||||||
|
# repmgrd binary
|
||||||
|
#REPMGRD_BIN=/usr/bin/repmgr
|
||||||
|
|
||||||
|
# pid file
|
||||||
|
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||||
|
|
||||||
|
# log file
|
||||||
|
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
||||||
|
|||||||
2
TODO
2
TODO
@@ -5,6 +5,8 @@ Known issues in repmgr
|
|||||||
the database server using the ``pg_ctl`` command may accidentally
|
the database server using the ``pg_ctl`` command may accidentally
|
||||||
terminate after their associated ssh session ends.
|
terminate after their associated ssh session ends.
|
||||||
|
|
||||||
|
* PGPASSFILE may not be passed to pg_basebackup
|
||||||
|
|
||||||
Planned feature improvements
|
Planned feature improvements
|
||||||
============================
|
============================
|
||||||
|
|
||||||
|
|||||||
58
dbutils.c
58
dbutils.c
@@ -197,7 +197,7 @@ is_pgup(PGconn *conn, int timeout)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the id of the active master node, or -1 if no
|
* Return the id of the active master node, or NODE_NOT_FOUND if no
|
||||||
* record available.
|
* record available.
|
||||||
*
|
*
|
||||||
* This reports the value stored in the database only and
|
* This reports the value stored in the database only and
|
||||||
@@ -224,12 +224,12 @@ get_master_node_id(PGconn *conn, char *cluster)
|
|||||||
{
|
{
|
||||||
log_err(_("get_master_node_id(): query failed\n%s\n"),
|
log_err(_("get_master_node_id(): query failed\n%s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
retval = -1;
|
retval = NODE_NOT_FOUND;
|
||||||
}
|
}
|
||||||
else if (PQntuples(res) == 0)
|
else if (PQntuples(res) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("get_master_node_id(): no active primary found\n"));
|
log_warning(_("get_master_node_id(): no active primary found\n"));
|
||||||
retval = -1;
|
retval = NODE_NOT_FOUND;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -511,7 +511,7 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
|||||||
|
|
||||||
if(master_id != NULL)
|
if(master_id != NULL)
|
||||||
{
|
{
|
||||||
*master_id = -1;
|
*master_id = NODE_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* find all nodes belonging to this cluster */
|
/* find all nodes belonging to this cluster */
|
||||||
@@ -728,6 +728,49 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether slot exists already; if it exists and is active, that
|
||||||
|
* means another active standby is using it, which creates an error situation;
|
||||||
|
* if not we can reuse it as-is
|
||||||
|
*/
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT active, slot_type "
|
||||||
|
" FROM pg_replication_slots "
|
||||||
|
" WHERE slot_name = '%s' ",
|
||||||
|
slot_name);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_err(_("unable to query pg_replication_slots: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(PQntuples(res))
|
||||||
|
{
|
||||||
|
if(strcmp(PQgetvalue(res, 0, 1), "physical") != 0)
|
||||||
|
{
|
||||||
|
log_err(_("Slot '%s' exists and is not a physical slot\n"),
|
||||||
|
slot_name);
|
||||||
|
PQclear(res);
|
||||||
|
}
|
||||||
|
if(strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||||
|
{
|
||||||
|
PQclear(res);
|
||||||
|
log_debug(_("Replication slot '%s' exists but is inactive; reusing\n"),
|
||||||
|
slot_name);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
log_err(_("Slot '%s' already exists as an active slot\n"),
|
||||||
|
slot_name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||||
slot_name);
|
slot_name);
|
||||||
@@ -1051,7 +1094,12 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
bool success = true;
|
bool success = true;
|
||||||
struct tm ts;
|
struct tm ts;
|
||||||
|
|
||||||
if(conn != NULL)
|
/* Only attempt to write a record if a connection handle was provided/
|
||||||
|
Also check that the repmgr schema has been properly intialised - if
|
||||||
|
not it means no configuration file was provided, which can happen with
|
||||||
|
e.g. `repmgr standby clone`, and we won't know which schema to write to.
|
||||||
|
*/
|
||||||
|
if(conn != NULL && strcmp(repmgr_schema, DEFAULT_REPMGR_SCHEMA_PREFIX) != 0)
|
||||||
{
|
{
|
||||||
int n_node_id = htonl(node_id);
|
int n_node_id = htonl(node_id);
|
||||||
char *t_successful = successful ? "TRUE" : "FALSE";
|
char *t_successful = successful ? "TRUE" : "FALSE";
|
||||||
|
|||||||
353
repmgr.c
353
repmgr.c
@@ -77,6 +77,7 @@ static bool write_recovery_file_line(FILE *recovery_file, char *recovery_file_pa
|
|||||||
static void check_master_standby_version_match(PGconn *conn, PGconn *master_conn);
|
static void check_master_standby_version_match(PGconn *conn, PGconn *master_conn);
|
||||||
static int check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string);
|
static int check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string);
|
||||||
static bool check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error);
|
static bool check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error);
|
||||||
|
static bool update_node_record_set_master(PGconn *conn, int this_node_id);
|
||||||
|
|
||||||
static char *make_pg_path(char *file);
|
static char *make_pg_path(char *file);
|
||||||
|
|
||||||
@@ -139,11 +140,11 @@ main(int argc, char **argv)
|
|||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
{"pg_bindir", required_argument, NULL, 'b'},
|
{"pg_bindir", required_argument, NULL, 'b'},
|
||||||
{"rsync-only", no_argument, NULL, 'r'},
|
{"rsync-only", no_argument, NULL, 'r'},
|
||||||
|
{"fast-checkpoint", no_argument, NULL, 'c'},
|
||||||
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
||||||
{"check-upstream-config", no_argument, NULL, 2},
|
{"check-upstream-config", no_argument, NULL, 2},
|
||||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
||||||
{"fast-checkpoint", no_argument, NULL, 4},
|
{"ignore-external-config-files", no_argument, NULL, 4},
|
||||||
{"ignore-external-config-files", no_argument, NULL, 5},
|
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -174,7 +175,7 @@ main(int argc, char **argv)
|
|||||||
/* Prevent getopt_long() from printing an error message */
|
/* Prevent getopt_long() from printing an error message */
|
||||||
opterr = 0;
|
opterr = 0;
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "d:h:p:U:S:D:l:f:R:w:k:FWIvr:b:", long_options,
|
while ((c = getopt_long(argc, argv, "d:h:p:U:S:D:l:f:R:w:k:FWIvb:r:c", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
@@ -239,6 +240,9 @@ main(int argc, char **argv)
|
|||||||
case 'r':
|
case 'r':
|
||||||
runtime_options.rsync_only = true;
|
runtime_options.rsync_only = true;
|
||||||
break;
|
break;
|
||||||
|
case 'c':
|
||||||
|
runtime_options.fast_checkpoint = true;
|
||||||
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
runtime_options.initdb_no_pwprompt = true;
|
runtime_options.initdb_no_pwprompt = true;
|
||||||
break;
|
break;
|
||||||
@@ -267,9 +271,6 @@ main(int argc, char **argv)
|
|||||||
strncpy(runtime_options.recovery_min_apply_delay, optarg, MAXLEN);
|
strncpy(runtime_options.recovery_min_apply_delay, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
runtime_options.fast_checkpoint = true;
|
|
||||||
break;
|
|
||||||
case 5:
|
|
||||||
runtime_options.ignore_external_config_files = true;
|
runtime_options.ignore_external_config_files = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -485,7 +486,7 @@ main(int argc, char **argv)
|
|||||||
*/
|
*/
|
||||||
if (config_file_required)
|
if (config_file_required)
|
||||||
{
|
{
|
||||||
if (options.node == -1)
|
if (options.node == NODE_NOT_FOUND)
|
||||||
{
|
{
|
||||||
if(config_file_parsed == true)
|
if(config_file_parsed == true)
|
||||||
{
|
{
|
||||||
@@ -925,7 +926,7 @@ do_standby_clone(void)
|
|||||||
int i;
|
int i;
|
||||||
bool pg_start_backup_executed = false;
|
bool pg_start_backup_executed = false;
|
||||||
bool target_directory_provided = false;
|
bool target_directory_provided = false;
|
||||||
bool config_file_copy_required = false;
|
bool external_config_file_copy_required = false;
|
||||||
|
|
||||||
char master_data_directory[MAXFILENAME];
|
char master_data_directory[MAXFILENAME];
|
||||||
char local_data_directory[MAXFILENAME];
|
char local_data_directory[MAXFILENAME];
|
||||||
@@ -1101,7 +1102,7 @@ do_standby_clone(void)
|
|||||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||||
{
|
{
|
||||||
config_file_outside_pgdata = true;
|
config_file_outside_pgdata = true;
|
||||||
config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1110,7 +1111,7 @@ do_standby_clone(void)
|
|||||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||||
{
|
{
|
||||||
hba_file_outside_pgdata = true;
|
hba_file_outside_pgdata = true;
|
||||||
config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1119,13 +1120,14 @@ do_standby_clone(void)
|
|||||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||||
{
|
{
|
||||||
ident_file_outside_pgdata = true;
|
ident_file_outside_pgdata = true;
|
||||||
config_file_copy_required = true;
|
external_config_file_copy_required = true;
|
||||||
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
log_warning(_("unknown parameter: %s\n"), PQgetvalue(res, i, 0));
|
log_warning(_("unknown parameter: %s\n"), PQgetvalue(res, i, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1316,12 +1318,6 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/*
|
|
||||||
* With rsync we'll need to explicitly copy configuration files in any
|
|
||||||
* case
|
|
||||||
*/
|
|
||||||
config_file_copy_required = true;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1335,14 +1331,14 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If configuration files were not in the data directory, we need to copy
|
* If configuration files were not inside the data directory, we;ll need to
|
||||||
* them via SSH
|
* copy them via SSH (unless `--ignore-external-config-files` was provided)
|
||||||
*
|
*
|
||||||
* TODO: add option to place these files in the same location on the
|
* TODO: add option to place these files in the same location on the
|
||||||
* standby server as on the primary?
|
* standby server as on the primary?
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if(config_file_copy_required == true)
|
if(external_config_file_copy_required && !runtime_options.ignore_external_config_files)
|
||||||
{
|
{
|
||||||
log_notice(_("copying configuration files from master\n"));
|
log_notice(_("copying configuration files from master\n"));
|
||||||
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
||||||
@@ -1354,102 +1350,81 @@ do_standby_clone(void)
|
|||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(strlen(master_config_file))
|
if(config_file_outside_pgdata)
|
||||||
{
|
{
|
||||||
if(runtime_options.ignore_external_config_files && config_file_outside_pgdata)
|
log_info(_("standby clone: master config file '%s'\n"), master_config_file);
|
||||||
{
|
|
||||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_config_file);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_info(_("standby clone: master config file '%s'\n"), master_config_file);
|
|
||||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
|
||||||
master_config_file, local_config_file, false, server_version_num);
|
|
||||||
if (r != 0)
|
|
||||||
{
|
|
||||||
log_warning(_("standby clone: failed copying master config file '%s'\n"),
|
|
||||||
master_config_file);
|
|
||||||
retval = ERR_BAD_SSH;
|
|
||||||
goto stop_backup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(strlen(master_hba_file))
|
|
||||||
{
|
|
||||||
if(runtime_options.ignore_external_config_files && hba_file_outside_pgdata)
|
|
||||||
{
|
|
||||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_hba_file);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_info(_("standby clone: master hba file '%s'\n"), master_hba_file);
|
|
||||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
|
||||||
master_hba_file, local_hba_file, false, server_version_num);
|
|
||||||
if (r != 0)
|
|
||||||
{
|
|
||||||
log_warning(_("standby clone: failed copying master hba file '%s'\n"),
|
|
||||||
master_hba_file);
|
|
||||||
retval = ERR_BAD_SSH;
|
|
||||||
goto stop_backup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(strlen(master_ident_file))
|
|
||||||
{
|
|
||||||
if(runtime_options.ignore_external_config_files && ident_file_outside_pgdata)
|
|
||||||
{
|
|
||||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_ident_file);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_info(_("standby clone: master ident file '%s'\n"), master_ident_file);
|
|
||||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
|
||||||
master_ident_file, local_ident_file, false, server_version_num);
|
|
||||||
if (r != 0)
|
|
||||||
{
|
|
||||||
log_warning(_("standby clone: failed copying master ident file '%s'\n"),
|
|
||||||
master_ident_file);
|
|
||||||
retval = ERR_BAD_SSH;
|
|
||||||
goto stop_backup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When using rsync, copy pg_control file last, emulating the base backup
|
|
||||||
* protocol.
|
|
||||||
*/
|
|
||||||
if(runtime_options.rsync_only)
|
|
||||||
{
|
|
||||||
maxlen_snprintf(local_control_file, "%s/global", local_data_directory);
|
|
||||||
|
|
||||||
log_info(_("standby clone: local control file '%s'\n"),
|
|
||||||
local_control_file);
|
|
||||||
|
|
||||||
if (!create_dir(local_control_file))
|
|
||||||
{
|
|
||||||
log_err(_("couldn't create directory %s ...\n"),
|
|
||||||
local_control_file);
|
|
||||||
goto stop_backup;
|
|
||||||
}
|
|
||||||
|
|
||||||
maxlen_snprintf(master_control_file, "%s/global/pg_control",
|
|
||||||
master_data_directory);
|
|
||||||
log_info(_("standby clone: master control file '%s'\n"),
|
|
||||||
master_control_file);
|
|
||||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||||
master_control_file, local_control_file,
|
master_config_file, local_config_file, false, server_version_num);
|
||||||
false, server_version_num);
|
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("standby clone: failed copying master control file '%s'\n"),
|
log_err(_("standby clone: failed copying master config file '%s'\n"),
|
||||||
master_control_file);
|
master_config_file);
|
||||||
retval = ERR_BAD_SSH;
|
retval = ERR_BAD_SSH;
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(hba_file_outside_pgdata)
|
||||||
|
{
|
||||||
|
log_info(_("standby clone: master hba file '%s'\n"), master_hba_file);
|
||||||
|
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||||
|
master_hba_file, local_hba_file, false, server_version_num);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_err(_("standby clone: failed copying master hba file '%s'\n"),
|
||||||
|
master_hba_file);
|
||||||
|
retval = ERR_BAD_SSH;
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ident_file_outside_pgdata)
|
||||||
|
{
|
||||||
|
log_info(_("standby clone: master ident file '%s'\n"), master_ident_file);
|
||||||
|
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||||
|
master_ident_file, local_ident_file, false, server_version_num);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_err(_("standby clone: failed copying master ident file '%s'\n"),
|
||||||
|
master_ident_file);
|
||||||
|
retval = ERR_BAD_SSH;
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When using rsync, copy pg_control file last, emulating the base backup
|
||||||
|
* protocol.
|
||||||
|
*/
|
||||||
|
if(runtime_options.rsync_only)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(local_control_file, "%s/global", local_data_directory);
|
||||||
|
|
||||||
|
log_info(_("standby clone: local control file '%s'\n"),
|
||||||
|
local_control_file);
|
||||||
|
|
||||||
|
if (!create_dir(local_control_file))
|
||||||
|
{
|
||||||
|
log_err(_("couldn't create directory %s ...\n"),
|
||||||
|
local_control_file);
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
|
|
||||||
|
maxlen_snprintf(master_control_file, "%s/global/pg_control",
|
||||||
|
master_data_directory);
|
||||||
|
log_info(_("standby clone: master control file '%s'\n"),
|
||||||
|
master_control_file);
|
||||||
|
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||||
|
master_control_file, local_control_file,
|
||||||
|
false, server_version_num);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_warning(_("standby clone: failed copying master control file '%s'\n"),
|
||||||
|
master_control_file);
|
||||||
|
retval = ERR_BAD_SSH;
|
||||||
|
goto stop_backup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stop_backup:
|
stop_backup:
|
||||||
@@ -1562,8 +1537,9 @@ do_standby_promote(void)
|
|||||||
int i,
|
int i,
|
||||||
promote_check_timeout = 60,
|
promote_check_timeout = 60,
|
||||||
promote_check_interval = 2;
|
promote_check_interval = 2;
|
||||||
bool promote_sucess = false;
|
bool promote_success = false;
|
||||||
bool success;
|
bool success;
|
||||||
|
PQExpBufferData details;
|
||||||
|
|
||||||
/* We need to connect to check configuration */
|
/* We need to connect to check configuration */
|
||||||
log_info(_("connecting to standby database\n"));
|
log_info(_("connecting to standby database\n"));
|
||||||
@@ -1637,50 +1613,57 @@ do_standby_promote(void)
|
|||||||
retval = is_standby(conn);
|
retval = is_standby(conn);
|
||||||
if(!retval)
|
if(!retval)
|
||||||
{
|
{
|
||||||
promote_sucess = true;
|
promote_success = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
sleep(promote_check_interval);
|
sleep(promote_check_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (promote_sucess == false)
|
if (promote_success == false)
|
||||||
{
|
{
|
||||||
PQExpBufferData details;
|
|
||||||
initPQExpBuffer(&details);
|
|
||||||
appendPQExpBuffer(&details,
|
|
||||||
"Node %i could not be promoted to master",
|
|
||||||
options.node);
|
|
||||||
|
|
||||||
create_event_record(old_master_conn,
|
|
||||||
&options,
|
|
||||||
options.node,
|
|
||||||
"standby_promote",
|
|
||||||
false,
|
|
||||||
details.data);
|
|
||||||
/* XXX exit with error? */
|
|
||||||
log_err(_(retval == 1 ?
|
log_err(_(retval == 1 ?
|
||||||
"STANDBY PROMOTE failed, this is still a standby node.\n" :
|
"STANDBY PROMOTE failed, this is still a standby node.\n" :
|
||||||
"connection to node lost!\n"));
|
"connection to node lost!\n"));
|
||||||
|
exit(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
|
||||||
|
/* update node information to reflect new status */
|
||||||
|
if(update_node_record_set_master(conn, options.node) == false)
|
||||||
{
|
{
|
||||||
PQExpBufferData details;
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
appendPQExpBuffer(&details,
|
appendPQExpBuffer(&details,
|
||||||
"Node %i was successfully promoted to master",
|
_("unable to update node record for node %i"),
|
||||||
options.node);
|
options.node);
|
||||||
|
|
||||||
log_notice(_("STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"));
|
log_err("%s\n", details.data);
|
||||||
/* Log the event */
|
|
||||||
create_event_record(conn,
|
create_event_record(NULL,
|
||||||
&options,
|
&options,
|
||||||
options.node,
|
options.node,
|
||||||
"standby_promote",
|
"repmgrd_failover_promote",
|
||||||
true,
|
false,
|
||||||
details.data);
|
details.data);
|
||||||
|
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(old_master_conn);
|
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
"Node %i was successfully promoted to master",
|
||||||
|
options.node);
|
||||||
|
|
||||||
|
log_notice(_("STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"));
|
||||||
|
|
||||||
|
/* Log the event */
|
||||||
|
create_event_record(conn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"standby_promote",
|
||||||
|
true,
|
||||||
|
details.data);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@@ -2232,7 +2215,7 @@ help(const char *progname)
|
|||||||
printf(_(" -p, --port=PORT database server port\n"));
|
printf(_(" -p, --port=PORT database server port\n"));
|
||||||
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
|
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
|
||||||
printf(_("\nConfiguration options:\n"));
|
printf(_("\nConfiguration options:\n"));
|
||||||
printf(_(" -b. --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
|
printf(_(" -b, --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
|
||||||
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
||||||
" copied to\n"));
|
" copied to\n"));
|
||||||
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
||||||
@@ -2242,15 +2225,13 @@ help(const char *progname)
|
|||||||
" (default: postgres)\n"));
|
" (default: postgres)\n"));
|
||||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
|
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
|
||||||
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
||||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of\n" \
|
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||||
" history\n"));
|
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||||
printf(_(" -F, --force force potentially dangerous operations\n" \
|
|
||||||
" to happen\n"));
|
|
||||||
printf(_(" -W, --wait wait for a master to appear\n"));
|
printf(_(" -W, --wait wait for a master to appear\n"));
|
||||||
printf(_(" -r, --rsync-only use only rsync to clone a standby\n"));
|
printf(_(" -r, --rsync-only use only rsync to clone a standby\n"));
|
||||||
|
printf(_(" -c, --fast-checkpoint force fast checkpoint when cloning a standby\n"));
|
||||||
printf(_(" --recovery-min-apply-delay=VALUE set recovery_min_apply_delay in recovery.conf\n" \
|
printf(_(" --recovery-min-apply-delay=VALUE set recovery_min_apply_delay in recovery.conf\n" \
|
||||||
" when cloning a standby (PostgreSQL 9.4 and later)\n"));
|
" when cloning a standby (PostgreSQL 9.4 and later)\n"));
|
||||||
printf(_(" --fast-checkpoint force fast checkpoint when cloning a standby\n"));
|
|
||||||
printf(_(" --ignore-external-config-files don't copy configuration files located outside \n" \
|
printf(_(" --ignore-external-config-files don't copy configuration files located outside \n" \
|
||||||
" the data directory when cloning a standby\n"));
|
" the data directory when cloning a standby\n"));
|
||||||
printf(_(" --initdb-no-pwprompt don't require superuser password when running initdb\n"));
|
printf(_(" --initdb-no-pwprompt don't require superuser password when running initdb\n"));
|
||||||
@@ -2440,7 +2421,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
*/
|
*/
|
||||||
if (is_directory)
|
if (is_directory)
|
||||||
{
|
{
|
||||||
/* Files we don't want */
|
/* Files which we don't want */
|
||||||
appendPQExpBuffer(&rsync_flags, "%s",
|
appendPQExpBuffer(&rsync_flags, "%s",
|
||||||
" --exclude=postmaster.pid --exclude=postmaster.opts --exclude=global/pg_control");
|
" --exclude=postmaster.pid --exclude=postmaster.opts --exclude=global/pg_control");
|
||||||
|
|
||||||
@@ -2454,11 +2435,11 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
" --exclude=postgresql.auto.conf.tmp");
|
" --exclude=postgresql.auto.conf.tmp");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Temporary files we don't want, if they exist */
|
/* Temporary files which we don't want, if they exist */
|
||||||
appendPQExpBuffer(&rsync_flags, " --exclude=%s*",
|
appendPQExpBuffer(&rsync_flags, " --exclude=%s*",
|
||||||
PG_TEMP_FILE_PREFIX);
|
PG_TEMP_FILE_PREFIX);
|
||||||
|
|
||||||
/* Directories we don't want */
|
/* Directories which we don't want */
|
||||||
appendPQExpBuffer(&rsync_flags, "%s",
|
appendPQExpBuffer(&rsync_flags, "%s",
|
||||||
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
||||||
|
|
||||||
@@ -2701,7 +2682,6 @@ create_schema(PGconn *conn)
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
|
|
||||||
/* create schema */
|
/* create schema */
|
||||||
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", get_repmgr_schema_quoted(conn));
|
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", get_repmgr_schema_quoted(conn));
|
||||||
log_debug(_("master register: %s\n"), sqlquery);
|
log_debug(_("master register: %s\n"), sqlquery);
|
||||||
@@ -3271,6 +3251,92 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
update_node_record_set_master(PGconn *conn, int this_node_id)
|
||||||
|
{
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
|
log_debug(_("Setting %i as master and marking existing master as failed\n"), this_node_id);
|
||||||
|
|
||||||
|
res = PQexec(conn, "BEGIN");
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to begin transaction: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
" UPDATE %s.repl_nodes "
|
||||||
|
" SET active = FALSE "
|
||||||
|
" WHERE cluster = '%s' "
|
||||||
|
" AND type = 'master' "
|
||||||
|
" AND active IS TRUE ",
|
||||||
|
get_repmgr_schema_quoted(conn),
|
||||||
|
options.cluster_name);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to set old master node as inactive: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
PQexec(conn, "ROLLBACK");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
" UPDATE %s.repl_nodes "
|
||||||
|
" SET type = 'master', "
|
||||||
|
" upstream_node_id = NULL "
|
||||||
|
" WHERE cluster = '%s' "
|
||||||
|
" AND id = %i ",
|
||||||
|
get_repmgr_schema_quoted(conn),
|
||||||
|
options.cluster_name,
|
||||||
|
this_node_id);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to set current node %i as active master: %s\n"),
|
||||||
|
this_node_id,
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
PQexec(conn, "ROLLBACK");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
res = PQexec(conn, "COMMIT");
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to set commit transaction: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_check_upstream_config(void)
|
do_check_upstream_config(void)
|
||||||
{
|
{
|
||||||
@@ -3307,7 +3373,6 @@ do_check_upstream_config(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
make_pg_path(char *file)
|
make_pg_path(char *file)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -7,8 +7,11 @@
|
|||||||
#
|
#
|
||||||
# repmgr and repmgrd require these items to be configured:
|
# repmgr and repmgrd require these items to be configured:
|
||||||
|
|
||||||
# Cluster name
|
# Cluster name - this will be used by repmgr to generate its internal
|
||||||
cluster=test
|
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||||
|
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||||
|
# to facilitate easier querying of the repmgr views and tables.
|
||||||
|
cluster=example_cluster
|
||||||
|
|
||||||
# Node ID and name
|
# Node ID and name
|
||||||
# (Note: we recommend to avoid naming nodes after their initial
|
# (Note: we recommend to avoid naming nodes after their initial
|
||||||
|
|||||||
1
repmgr.h
1
repmgr.h
@@ -49,6 +49,7 @@
|
|||||||
|
|
||||||
#define MANUAL_FAILOVER 0
|
#define MANUAL_FAILOVER 0
|
||||||
#define AUTOMATIC_FAILOVER 1
|
#define AUTOMATIC_FAILOVER 1
|
||||||
|
#define NODE_NOT_FOUND -1
|
||||||
#define NO_UPSTREAM_NODE -1
|
#define NO_UPSTREAM_NODE -1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
141
repmgrd.c
141
repmgrd.c
@@ -91,7 +91,6 @@ static void witness_monitor(void);
|
|||||||
static bool check_connection(PGconn *conn, const char *type);
|
static bool check_connection(PGconn *conn, const char *type);
|
||||||
static bool set_local_node_failed(void);
|
static bool set_local_node_failed(void);
|
||||||
|
|
||||||
static bool update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id);
|
|
||||||
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||||
|
|
||||||
static void update_shared_memory(char *last_wal_standby_applied);
|
static void update_shared_memory(char *last_wal_standby_applied);
|
||||||
@@ -284,9 +283,17 @@ main(int argc, char **argv)
|
|||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Retrieve record for this node from the database */
|
/* Retrieve record for this node from the local database */
|
||||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||||
|
|
||||||
|
/* No node record found - exit gracefully */
|
||||||
|
if(node_info.node_id == NODE_NOT_FOUND)
|
||||||
|
{
|
||||||
|
log_err(_("No metadata record found for this node - terminating\n"));
|
||||||
|
log_notice(_("HINT: was this node registered with 'repmgr (master|standby) register'?\n"));
|
||||||
|
terminate(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -335,9 +342,9 @@ main(int argc, char **argv)
|
|||||||
log_info(_("starting continuous master connection check\n"));
|
log_info(_("starting continuous master connection check\n"));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that master is still alive.
|
* Check that master is still alive.
|
||||||
* XXX We should also check that the
|
* XXX We should also check that the
|
||||||
* standby servers are sending info
|
* standby servers are sending info
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -637,9 +644,9 @@ witness_monitor(void)
|
|||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_monitor "
|
"INSERT INTO %s.repl_monitor "
|
||||||
" (master_node, standby_node, "
|
" (primary_node, standby_node, "
|
||||||
" last_monitor_time, last_apply_time, "
|
" last_monitor_time, last_apply_time, "
|
||||||
" last_wal_master_location, last_wal_standby_location, "
|
" last_wal_primary_location, last_wal_standby_location, "
|
||||||
" replication_lag, apply_lag )"
|
" replication_lag, apply_lag )"
|
||||||
" VALUES(%d, %d, "
|
" VALUES(%d, %d, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
||||||
@@ -993,9 +1000,9 @@ standby_monitor(void)
|
|||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_monitor "
|
"INSERT INTO %s.repl_monitor "
|
||||||
" (master_node, standby_node, "
|
" (primary_node, standby_node, "
|
||||||
" last_monitor_time, last_apply_time, "
|
" last_monitor_time, last_apply_time, "
|
||||||
" last_wal_master_location, last_wal_standby_location, "
|
" last_wal_primary_location, last_wal_standby_location, "
|
||||||
" replication_lag, apply_lag ) "
|
" replication_lag, apply_lag ) "
|
||||||
" VALUES(%d, %d, "
|
" VALUES(%d, %d, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||||
@@ -1434,25 +1441,6 @@ do_master_failover(void)
|
|||||||
/* and reconnect to the local database */
|
/* and reconnect to the local database */
|
||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
|
|
||||||
/* update node information to reflect new status */
|
|
||||||
if(update_node_record_set_master(my_local_conn, node_info.node_id, failed_master.node_id) == false)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("unable to update node record for node %i (promoted to master following failure of node %i)"),
|
|
||||||
node_info.node_id,
|
|
||||||
failed_master.node_id);
|
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
|
||||||
|
|
||||||
create_event_record(NULL,
|
|
||||||
&local_options,
|
|
||||||
node_info.node_id,
|
|
||||||
"repmgrd_failover_promote",
|
|
||||||
false,
|
|
||||||
event_details.data);
|
|
||||||
|
|
||||||
terminate(ERR_DB_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update internal record for this node */
|
/* update internal record for this node */
|
||||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||||
@@ -1749,7 +1737,7 @@ set_local_node_failed(void)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int active_master_node_id = -1;
|
int active_master_node_id = NODE_NOT_FOUND;
|
||||||
char master_conninfo[MAXLEN];
|
char master_conninfo[MAXLEN];
|
||||||
|
|
||||||
if (!check_connection(master_conn, "master"))
|
if (!check_connection(master_conn, "master"))
|
||||||
@@ -2089,7 +2077,7 @@ update_registration(void)
|
|||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -2234,7 +2222,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
t_node_info node_info = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info node_info = { NODE_NOT_FOUND, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||||
|
|
||||||
sprintf(sqlquery,
|
sprintf(sqlquery,
|
||||||
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
||||||
@@ -2260,7 +2248,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -2274,7 +2262,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
if (!PQntuples(res)) {
|
if (!PQntuples(res)) {
|
||||||
log_warning(_("No record found record for node %i\n"), node_id);
|
log_warning(_("No record found record for node %i\n"), node_id);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
node_info.node_id = -1;
|
node_info.node_id = NODE_NOT_FOUND;
|
||||||
return node_info;
|
return node_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2313,93 +2301,6 @@ parse_node_type(const char *type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
|
||||||
update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id)
|
|
||||||
{
|
|
||||||
PGresult *res;
|
|
||||||
char sqlquery[QUERY_STR_LEN];
|
|
||||||
|
|
||||||
log_debug(_("Setting failed node %i inactive; marking node %i as master\n"), old_master_node_id, this_node_id);
|
|
||||||
|
|
||||||
res = PQexec(conn, "BEGIN");
|
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
||||||
{
|
|
||||||
log_err(_("Unable to begin transaction: %s\n"),
|
|
||||||
PQerrorMessage(conn));
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
|
||||||
" UPDATE %s.repl_nodes "
|
|
||||||
" SET active = FALSE "
|
|
||||||
" WHERE cluster = '%s' "
|
|
||||||
" AND id = %i ",
|
|
||||||
get_repmgr_schema_quoted(conn),
|
|
||||||
local_options.cluster_name,
|
|
||||||
old_master_node_id);
|
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
||||||
{
|
|
||||||
log_err(_("Unable to set old master node %i as inactive: %s\n"),
|
|
||||||
old_master_node_id,
|
|
||||||
PQerrorMessage(conn));
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
PQexec(conn, "ROLLBACK");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
|
||||||
" UPDATE %s.repl_nodes "
|
|
||||||
" SET type = 'master', "
|
|
||||||
" upstream_node_id = NULL "
|
|
||||||
" WHERE cluster = '%s' "
|
|
||||||
" AND id = %i ",
|
|
||||||
get_repmgr_schema_quoted(conn),
|
|
||||||
local_options.cluster_name,
|
|
||||||
this_node_id);
|
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
||||||
{
|
|
||||||
log_err(_("Unable to set current node %i as active master: %s\n"),
|
|
||||||
this_node_id,
|
|
||||||
PQerrorMessage(conn));
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
PQexec(conn, "ROLLBACK");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
res = PQexec(conn, "COMMIT");
|
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
||||||
{
|
|
||||||
log_err(_("Unable to set commit transaction: %s\n"),
|
|
||||||
PQerrorMessage(conn));
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id)
|
update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user