mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf4de71523 | ||
|
|
770e6f758c | ||
|
|
3deb6784e7 | ||
|
|
ba275bb0c2 | ||
|
|
9735bb63a1 | ||
|
|
1e5792f8df | ||
|
|
a01fefa7d0 | ||
|
|
34eaf94b2b | ||
|
|
68e3a9d7ab | ||
|
|
2ad4f68700 | ||
|
|
00aa0c8c87 | ||
|
|
e8025c7c9f | ||
|
|
6a17360b4c | ||
|
|
9e5e843a4f | ||
|
|
734ae1825e | ||
|
|
41fe58764e | ||
|
|
58a5249b7e | ||
|
|
90c0bd4638 | ||
|
|
359e81a6d6 | ||
|
|
07d220cb00 | ||
|
|
4dfeffe087 | ||
|
|
18544c82ca | ||
|
|
0f86bdcd05 | ||
|
|
7d33c1e411 | ||
|
|
fec65bde3d | ||
|
|
4863ea98bc |
19
FAQ.md
19
FAQ.md
@@ -90,6 +90,23 @@ General
|
||||
|
||||
This option is only available when using the `--rsync-only` option.
|
||||
|
||||
- How can I make the witness server use a particular port?
|
||||
|
||||
By default the witness server is configured to use port 5499; this
|
||||
is intended to support running the witness server as a separate
|
||||
instance on a normal node server, rather than on its own dedicated server.
|
||||
|
||||
To specify a port for the witness server, supply the port number to
|
||||
repmgr with the `-l/--local-port` command line option.
|
||||
|
||||
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
||||
in `postgresql.conf` if I'm not using `repmgrd`?
|
||||
|
||||
No, the `repmgr_funcs` library is only needed when running `repmgrd`.
|
||||
If you later decide to run `repmgrd`, you just need to add
|
||||
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
||||
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
|
||||
@@ -102,7 +119,7 @@ General
|
||||
|
||||
- How can I prevent a node from ever being promoted to master?
|
||||
|
||||
In `rempgr.conf`, set its priority to a value of 0 or less.
|
||||
In `repmgr.conf`, set its priority to a value of 0 or less.
|
||||
|
||||
- Does `repmgrd` support delayed standbys?
|
||||
|
||||
|
||||
14
HISTORY
14
HISTORY
@@ -1,4 +1,11 @@
|
||||
3.0
|
||||
3.0.1 2015-04-16
|
||||
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
||||
When promoting a standby, have repmgr (not repmgrd) handle metadata updates (Ian)
|
||||
Re-use replication slot if it already exists (Ian)
|
||||
Prevent a test SSH connection being made when not needed (Ian)
|
||||
Correct monitoring table column names (Ian)
|
||||
|
||||
3.0 2015-03-27
|
||||
Require PostgreSQL 9.3 or later (Ian)
|
||||
Use `pg_basebackup` by default (instead of `rsync`) to clone standby servers (Ian)
|
||||
Use `pg_ctl promote` to promote a standby to primary
|
||||
@@ -11,6 +18,11 @@
|
||||
General usability and logging message improvements (Ian)
|
||||
Code consolidation and cleanup (Ian)
|
||||
|
||||
2.0.3 2015-04-16
|
||||
Add -S/--superuser option for witness database creation Ian)
|
||||
Add -c/--fast-checkpoint option for cloning (Christoph)
|
||||
Add option "--initdb-no-pwprompt" (Ian)
|
||||
|
||||
2.0.2 2015-02-17
|
||||
Add "--checksum" in rsync when using "--force" (Jaime)
|
||||
Use createdb/createuser instead of psql (Jaime)
|
||||
|
||||
@@ -98,7 +98,7 @@ packages installed::
|
||||
|
||||
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
||||
|
||||
If your using Debian packages for PostgreSQL and are building repmgr with the
|
||||
If you're using Debian packages for PostgreSQL and are building repmgr with the
|
||||
USE_PGXS option you also need to install the corresponding development package::
|
||||
|
||||
sudo apt-get install postgresql-server-dev-9.0
|
||||
|
||||
@@ -71,7 +71,10 @@ Standby setup
|
||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
||||
|
||||
Note that at this point it does not matter if the `repmgr.conf` file is not found.
|
||||
Note that the `repmgr.conf` file is not required when cloning a standby.
|
||||
However we recommend providing a valid `repmgr.conf` if you wish to use
|
||||
replication slots, or want `repmgr` to log the clone event to the
|
||||
`repl_events` table.
|
||||
|
||||
This will clone the PostgreSQL database files from the master, including its
|
||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||
|
||||
@@ -1,89 +1,114 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
#
|
||||
# repmgrd Start up the repmgrd daemon
|
||||
# repmrgd (replication manager daemon)
|
||||
#
|
||||
# chkconfig: - 75 16
|
||||
# description: repmgrd is the repliation manager daemon \
|
||||
# The repmgrd replication management and monitoring daemon for PostgreSQL.
|
||||
|
||||
### BEGIN INIT INFO
|
||||
# Provides: repmgrd
|
||||
# Required-Start: $local_fs $remote_fs $network $syslog postgresql
|
||||
# Required-Stop: $local_fs $remote_fs $network $syslog postgresql
|
||||
# Should-Start: $syslog postgresql-9.3
|
||||
# Should-Stop: $syslog postgresql-9.3
|
||||
# Short-Description: start and stop repmrgd
|
||||
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# this is used to monitor a postgresql cluster.
|
||||
### END INIT INFO
|
||||
# chkconfig: - 75 16
|
||||
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# processname: repmgrd
|
||||
# pidfile="/var/run/${NAME}.pid"
|
||||
|
||||
# Source function library.
|
||||
. /etc/init.d/functions
|
||||
INITD=/etc/rc.d/init.d
|
||||
. $INITD/functions
|
||||
|
||||
# Source networking configuration.
|
||||
# Get function listing for cross-distribution logic.
|
||||
TYPESET=`typeset -f|grep "declare"`
|
||||
|
||||
# Get network config.
|
||||
. /etc/sysconfig/network
|
||||
|
||||
prog=repmgrd
|
||||
REPMGRD_ENABLED=yes
|
||||
DESC="PostgreSQL replication management and monitoring daemon"
|
||||
NAME=repmgrd
|
||||
|
||||
REPMGRD_ENABLED=no
|
||||
REPMGRD_OPTS=
|
||||
REPMGRD_USER=postgres
|
||||
DAEMONIZE="-d"
|
||||
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
||||
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
||||
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
||||
|
||||
# pull in sysconfig settings
|
||||
[ -f /etc/sysconfig/repmgrd ] && . /etc/sysconfig/repmgrd
|
||||
# Read configuration variable file if it is present
|
||||
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
||||
|
||||
LOCKFILE=/var/lock/subsys/$prog
|
||||
RETVAL=0
|
||||
# For SELinux we need to use 'runuser' not 'su'
|
||||
if [ -x /sbin/runuser ]
|
||||
then
|
||||
SU=runuser
|
||||
else
|
||||
SU=su
|
||||
fi
|
||||
|
||||
test -x $REPMGRD_BIN || exit 0
|
||||
|
||||
case "$REPMGRD_ENABLED" in
|
||||
[Yy]*)
|
||||
#nothing to do here
|
||||
break
|
||||
;;
|
||||
*)
|
||||
exit 2
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
if [ -z "$REPMGRD_OPTS" ]
|
||||
if [ -z "${REPMGRD_OPTS}" ]
|
||||
then
|
||||
echo "Not starting $prog, REPMGRD_OPTS not set in /etc/sysconfig/$prog"
|
||||
exit 2
|
||||
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
start() {
|
||||
[ "$EUID" != "0" ] && exit 4
|
||||
[ "$NETWORKING" = "no" ] && exit 1
|
||||
start()
|
||||
{
|
||||
REPMGRD_START=$"Starting ${NAME} service: "
|
||||
|
||||
# Start daemons.
|
||||
echo -n $"Starting $prog: "
|
||||
daemon --user $REPMGRD_USER $prog $DAEMONIZE $REPMGRD_OPTS
|
||||
RETVAL=$?
|
||||
# Make sure startup-time log file is valid
|
||||
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
||||
then
|
||||
touch "${REPMGRD_LOG}" || exit 1
|
||||
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
||||
chmod go-rwx "${REPMGRD_LOG}"
|
||||
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
||||
fi
|
||||
|
||||
echo -n "${REPMGRD_START}"
|
||||
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
||||
sleep 2
|
||||
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
||||
if [ "x${pid}" != "x" ]
|
||||
then
|
||||
success "${REPMGRD_START}"
|
||||
touch "${REPMGRD_LOCK}"
|
||||
echo $pid > "${REPMGRD_PIDFILE}"
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && touch $LOCKFILE
|
||||
return $RETVAL
|
||||
else
|
||||
failure "${REPMGRD_START}"
|
||||
echo
|
||||
script_result=1
|
||||
fi
|
||||
}
|
||||
|
||||
stop() {
|
||||
[ "$EUID" != "0" ] && exit 4
|
||||
echo -n $"Shutting down $prog: "
|
||||
killproc $prog
|
||||
RETVAL=$?
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
|
||||
return $RETVAL
|
||||
}
|
||||
status() {
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
echo "$prog is running"
|
||||
stop()
|
||||
{
|
||||
echo -n $"Stopping ${NAME} service: "
|
||||
if [ -e "${REPMGRD_LOCK}" ]
|
||||
then
|
||||
killproc ${NAME}
|
||||
ret=$?
|
||||
if [ $ret -eq 0 ]
|
||||
then
|
||||
echo_success
|
||||
rm -f "${REPMGRD_PIDFILE}"
|
||||
rm -f "${REPMGRD_LOCK}"
|
||||
else
|
||||
RETVAL=3
|
||||
echo "$prog is stopped"
|
||||
echo_failure
|
||||
script_result=1
|
||||
fi
|
||||
return $RETVAL
|
||||
else
|
||||
# not running; per LSB standards this is "ok"
|
||||
echo_success
|
||||
fi
|
||||
echo
|
||||
}
|
||||
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
@@ -93,22 +118,16 @@ case "$1" in
|
||||
stop
|
||||
;;
|
||||
status)
|
||||
status $prog
|
||||
status -p $REPMGRD_PIDFILE $NAME
|
||||
script_result=$?
|
||||
;;
|
||||
restart|force-reload)
|
||||
restart)
|
||||
stop
|
||||
start
|
||||
;;
|
||||
try-restart|condrestart)
|
||||
if status $prog > /dev/null; then
|
||||
stop
|
||||
start
|
||||
fi
|
||||
;;
|
||||
reload)
|
||||
exit 3
|
||||
start
|
||||
;;
|
||||
*)
|
||||
echo $"Usage: $0 {start|stop|status|restart|try-restart|force-reload}"
|
||||
echo $"Usage: $0 {start|stop|status|restart}"
|
||||
exit 2
|
||||
esac
|
||||
|
||||
exit $script_result
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
#default sysconfig file for repmrgd
|
||||
#custom overrides can be placed here
|
||||
# default settings for repmgrd. This file is source by /bin/sh from
|
||||
# /etc/init.d/repmgrd
|
||||
|
||||
REPMGRD_OPTS="-f /etc/repmgr/repmgr.conf"
|
||||
# disable repmgrd by default so it won't get started upon installation
|
||||
# valid values: yes/no
|
||||
REPMGRD_ENABLED=no
|
||||
|
||||
# Options for repmgrd (required)
|
||||
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
||||
|
||||
# User to run repmgrd as
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGRD_BIN=/usr/bin/repmgr
|
||||
|
||||
# pid file
|
||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||
|
||||
# log file
|
||||
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
||||
|
||||
2
TODO
2
TODO
@@ -5,6 +5,8 @@ Known issues in repmgr
|
||||
the database server using the ``pg_ctl`` command may accidentally
|
||||
terminate after their associated ssh session ends.
|
||||
|
||||
* PGPASSFILE may not be passed to pg_basebackup
|
||||
|
||||
Planned feature improvements
|
||||
============================
|
||||
|
||||
|
||||
58
dbutils.c
58
dbutils.c
@@ -197,7 +197,7 @@ is_pgup(PGconn *conn, int timeout)
|
||||
|
||||
|
||||
/*
|
||||
* Return the id of the active master node, or -1 if no
|
||||
* Return the id of the active master node, or NODE_NOT_FOUND if no
|
||||
* record available.
|
||||
*
|
||||
* This reports the value stored in the database only and
|
||||
@@ -224,12 +224,12 @@ get_master_node_id(PGconn *conn, char *cluster)
|
||||
{
|
||||
log_err(_("get_master_node_id(): query failed\n%s\n"),
|
||||
PQerrorMessage(conn));
|
||||
retval = -1;
|
||||
retval = NODE_NOT_FOUND;
|
||||
}
|
||||
else if (PQntuples(res) == 0)
|
||||
{
|
||||
log_warning(_("get_master_node_id(): no active primary found\n"));
|
||||
retval = -1;
|
||||
retval = NODE_NOT_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -511,7 +511,7 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
||||
|
||||
if(master_id != NULL)
|
||||
{
|
||||
*master_id = -1;
|
||||
*master_id = NODE_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* find all nodes belonging to this cluster */
|
||||
@@ -728,6 +728,49 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
/*
|
||||
* Check whether slot exists already; if it exists and is active, that
|
||||
* means another active standby is using it, which creates an error situation;
|
||||
* if not we can reuse it as-is
|
||||
*/
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT active, slot_type "
|
||||
" FROM pg_replication_slots "
|
||||
" WHERE slot_name = '%s' ",
|
||||
slot_name);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("unable to query pg_replication_slots: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(PQntuples(res))
|
||||
{
|
||||
if(strcmp(PQgetvalue(res, 0, 1), "physical") != 0)
|
||||
{
|
||||
log_err(_("Slot '%s' exists and is not a physical slot\n"),
|
||||
slot_name);
|
||||
PQclear(res);
|
||||
}
|
||||
if(strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
log_debug(_("Replication slot '%s' exists but is inactive; reusing\n"),
|
||||
slot_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
PQclear(res);
|
||||
log_err(_("Slot '%s' already exists as an active slot\n"),
|
||||
slot_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
@@ -1051,7 +1094,12 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
||||
bool success = true;
|
||||
struct tm ts;
|
||||
|
||||
if(conn != NULL)
|
||||
/* Only attempt to write a record if a connection handle was provided/
|
||||
Also check that the repmgr schema has been properly intialised - if
|
||||
not it means no configuration file was provided, which can happen with
|
||||
e.g. `repmgr standby clone`, and we won't know which schema to write to.
|
||||
*/
|
||||
if(conn != NULL && strcmp(repmgr_schema, DEFAULT_REPMGR_SCHEMA_PREFIX) != 0)
|
||||
{
|
||||
int n_node_id = htonl(node_id);
|
||||
char *t_successful = successful ? "TRUE" : "FALSE";
|
||||
|
||||
353
repmgr.c
353
repmgr.c
@@ -77,6 +77,7 @@ static bool write_recovery_file_line(FILE *recovery_file, char *recovery_file_pa
|
||||
static void check_master_standby_version_match(PGconn *conn, PGconn *master_conn);
|
||||
static int check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string);
|
||||
static bool check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error);
|
||||
static bool update_node_record_set_master(PGconn *conn, int this_node_id);
|
||||
|
||||
static char *make_pg_path(char *file);
|
||||
|
||||
@@ -139,11 +140,11 @@ main(int argc, char **argv)
|
||||
{"verbose", no_argument, NULL, 'v'},
|
||||
{"pg_bindir", required_argument, NULL, 'b'},
|
||||
{"rsync-only", no_argument, NULL, 'r'},
|
||||
{"fast-checkpoint", no_argument, NULL, 'c'},
|
||||
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
||||
{"check-upstream-config", no_argument, NULL, 2},
|
||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
||||
{"fast-checkpoint", no_argument, NULL, 4},
|
||||
{"ignore-external-config-files", no_argument, NULL, 5},
|
||||
{"ignore-external-config-files", no_argument, NULL, 4},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
@@ -174,7 +175,7 @@ main(int argc, char **argv)
|
||||
/* Prevent getopt_long() from printing an error message */
|
||||
opterr = 0;
|
||||
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:S:D:l:f:R:w:k:FWIvr:b:", long_options,
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:S:D:l:f:R:w:k:FWIvb:r:c", long_options,
|
||||
&optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
@@ -239,6 +240,9 @@ main(int argc, char **argv)
|
||||
case 'r':
|
||||
runtime_options.rsync_only = true;
|
||||
break;
|
||||
case 'c':
|
||||
runtime_options.fast_checkpoint = true;
|
||||
break;
|
||||
case 1:
|
||||
runtime_options.initdb_no_pwprompt = true;
|
||||
break;
|
||||
@@ -267,9 +271,6 @@ main(int argc, char **argv)
|
||||
strncpy(runtime_options.recovery_min_apply_delay, optarg, MAXLEN);
|
||||
break;
|
||||
case 4:
|
||||
runtime_options.fast_checkpoint = true;
|
||||
break;
|
||||
case 5:
|
||||
runtime_options.ignore_external_config_files = true;
|
||||
break;
|
||||
default:
|
||||
@@ -485,7 +486,7 @@ main(int argc, char **argv)
|
||||
*/
|
||||
if (config_file_required)
|
||||
{
|
||||
if (options.node == -1)
|
||||
if (options.node == NODE_NOT_FOUND)
|
||||
{
|
||||
if(config_file_parsed == true)
|
||||
{
|
||||
@@ -925,7 +926,7 @@ do_standby_clone(void)
|
||||
int i;
|
||||
bool pg_start_backup_executed = false;
|
||||
bool target_directory_provided = false;
|
||||
bool config_file_copy_required = false;
|
||||
bool external_config_file_copy_required = false;
|
||||
|
||||
char master_data_directory[MAXFILENAME];
|
||||
char local_data_directory[MAXFILENAME];
|
||||
@@ -1101,7 +1102,7 @@ do_standby_clone(void)
|
||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||
{
|
||||
config_file_outside_pgdata = true;
|
||||
config_file_copy_required = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
}
|
||||
}
|
||||
@@ -1110,7 +1111,7 @@ do_standby_clone(void)
|
||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||
{
|
||||
hba_file_outside_pgdata = true;
|
||||
config_file_copy_required = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
}
|
||||
}
|
||||
@@ -1119,13 +1120,14 @@ do_standby_clone(void)
|
||||
if(strcmp(PQgetvalue(res, i, 2), "f") == 0)
|
||||
{
|
||||
ident_file_outside_pgdata = true;
|
||||
config_file_copy_required = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
}
|
||||
}
|
||||
else
|
||||
log_warning(_("unknown parameter: %s\n"), PQgetvalue(res, i, 0));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
@@ -1316,12 +1318,6 @@ do_standby_clone(void)
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* With rsync we'll need to explicitly copy configuration files in any
|
||||
* case
|
||||
*/
|
||||
config_file_copy_required = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1335,14 +1331,14 @@ do_standby_clone(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* If configuration files were not in the data directory, we need to copy
|
||||
* them via SSH
|
||||
* If configuration files were not inside the data directory, we;ll need to
|
||||
* copy them via SSH (unless `--ignore-external-config-files` was provided)
|
||||
*
|
||||
* TODO: add option to place these files in the same location on the
|
||||
* standby server as on the primary?
|
||||
*/
|
||||
|
||||
if(config_file_copy_required == true)
|
||||
if(external_config_file_copy_required && !runtime_options.ignore_external_config_files)
|
||||
{
|
||||
log_notice(_("copying configuration files from master\n"));
|
||||
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
||||
@@ -1354,102 +1350,81 @@ do_standby_clone(void)
|
||||
goto stop_backup;
|
||||
}
|
||||
|
||||
if(strlen(master_config_file))
|
||||
if(config_file_outside_pgdata)
|
||||
{
|
||||
if(runtime_options.ignore_external_config_files && config_file_outside_pgdata)
|
||||
{
|
||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_config_file);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("standby clone: master config file '%s'\n"), master_config_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_config_file, local_config_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master config file '%s'\n"),
|
||||
master_config_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(strlen(master_hba_file))
|
||||
{
|
||||
if(runtime_options.ignore_external_config_files && hba_file_outside_pgdata)
|
||||
{
|
||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_hba_file);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("standby clone: master hba file '%s'\n"), master_hba_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_hba_file, local_hba_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master hba file '%s'\n"),
|
||||
master_hba_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(strlen(master_ident_file))
|
||||
{
|
||||
if(runtime_options.ignore_external_config_files && ident_file_outside_pgdata)
|
||||
{
|
||||
log_notice(_("standby clone: not copying master config file '%s'\n"), master_ident_file);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("standby clone: master ident file '%s'\n"), master_ident_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_ident_file, local_ident_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master ident file '%s'\n"),
|
||||
master_ident_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* When using rsync, copy pg_control file last, emulating the base backup
|
||||
* protocol.
|
||||
*/
|
||||
if(runtime_options.rsync_only)
|
||||
{
|
||||
maxlen_snprintf(local_control_file, "%s/global", local_data_directory);
|
||||
|
||||
log_info(_("standby clone: local control file '%s'\n"),
|
||||
local_control_file);
|
||||
|
||||
if (!create_dir(local_control_file))
|
||||
{
|
||||
log_err(_("couldn't create directory %s ...\n"),
|
||||
local_control_file);
|
||||
goto stop_backup;
|
||||
}
|
||||
|
||||
maxlen_snprintf(master_control_file, "%s/global/pg_control",
|
||||
master_data_directory);
|
||||
log_info(_("standby clone: master control file '%s'\n"),
|
||||
master_control_file);
|
||||
log_info(_("standby clone: master config file '%s'\n"), master_config_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_control_file, local_control_file,
|
||||
false, server_version_num);
|
||||
master_config_file, local_config_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master control file '%s'\n"),
|
||||
master_control_file);
|
||||
log_err(_("standby clone: failed copying master config file '%s'\n"),
|
||||
master_config_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
|
||||
if(hba_file_outside_pgdata)
|
||||
{
|
||||
log_info(_("standby clone: master hba file '%s'\n"), master_hba_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_hba_file, local_hba_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("standby clone: failed copying master hba file '%s'\n"),
|
||||
master_hba_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
|
||||
if(ident_file_outside_pgdata)
|
||||
{
|
||||
log_info(_("standby clone: master ident file '%s'\n"), master_ident_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_ident_file, local_ident_file, false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("standby clone: failed copying master ident file '%s'\n"),
|
||||
master_ident_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* When using rsync, copy pg_control file last, emulating the base backup
|
||||
* protocol.
|
||||
*/
|
||||
if(runtime_options.rsync_only)
|
||||
{
|
||||
maxlen_snprintf(local_control_file, "%s/global", local_data_directory);
|
||||
|
||||
log_info(_("standby clone: local control file '%s'\n"),
|
||||
local_control_file);
|
||||
|
||||
if (!create_dir(local_control_file))
|
||||
{
|
||||
log_err(_("couldn't create directory %s ...\n"),
|
||||
local_control_file);
|
||||
goto stop_backup;
|
||||
}
|
||||
|
||||
maxlen_snprintf(master_control_file, "%s/global/pg_control",
|
||||
master_data_directory);
|
||||
log_info(_("standby clone: master control file '%s'\n"),
|
||||
master_control_file);
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_control_file, local_control_file,
|
||||
false, server_version_num);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master control file '%s'\n"),
|
||||
master_control_file);
|
||||
retval = ERR_BAD_SSH;
|
||||
goto stop_backup;
|
||||
}
|
||||
}
|
||||
|
||||
stop_backup:
|
||||
@@ -1562,8 +1537,9 @@ do_standby_promote(void)
|
||||
int i,
|
||||
promote_check_timeout = 60,
|
||||
promote_check_interval = 2;
|
||||
bool promote_sucess = false;
|
||||
bool promote_success = false;
|
||||
bool success;
|
||||
PQExpBufferData details;
|
||||
|
||||
/* We need to connect to check configuration */
|
||||
log_info(_("connecting to standby database\n"));
|
||||
@@ -1637,50 +1613,57 @@ do_standby_promote(void)
|
||||
retval = is_standby(conn);
|
||||
if(!retval)
|
||||
{
|
||||
promote_sucess = true;
|
||||
promote_success = true;
|
||||
break;
|
||||
}
|
||||
sleep(promote_check_interval);
|
||||
}
|
||||
|
||||
if (promote_sucess == false)
|
||||
if (promote_success == false)
|
||||
{
|
||||
PQExpBufferData details;
|
||||
initPQExpBuffer(&details);
|
||||
appendPQExpBuffer(&details,
|
||||
"Node %i could not be promoted to master",
|
||||
options.node);
|
||||
|
||||
create_event_record(old_master_conn,
|
||||
&options,
|
||||
options.node,
|
||||
"standby_promote",
|
||||
false,
|
||||
details.data);
|
||||
/* XXX exit with error? */
|
||||
log_err(_(retval == 1 ?
|
||||
"STANDBY PROMOTE failed, this is still a standby node.\n" :
|
||||
"connection to node lost!\n"));
|
||||
exit(ERR_FAILOVER_FAIL);
|
||||
}
|
||||
else
|
||||
|
||||
|
||||
/* update node information to reflect new status */
|
||||
if(update_node_record_set_master(conn, options.node) == false)
|
||||
{
|
||||
PQExpBufferData details;
|
||||
initPQExpBuffer(&details);
|
||||
appendPQExpBuffer(&details,
|
||||
"Node %i was successfully promoted to master",
|
||||
_("unable to update node record for node %i"),
|
||||
options.node);
|
||||
|
||||
log_notice(_("STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"));
|
||||
/* Log the event */
|
||||
create_event_record(conn,
|
||||
log_err("%s\n", details.data);
|
||||
|
||||
create_event_record(NULL,
|
||||
&options,
|
||||
options.node,
|
||||
"standby_promote",
|
||||
true,
|
||||
"repmgrd_failover_promote",
|
||||
false,
|
||||
details.data);
|
||||
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
PQfinish(old_master_conn);
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
appendPQExpBuffer(&details,
|
||||
"Node %i was successfully promoted to master",
|
||||
options.node);
|
||||
|
||||
log_notice(_("STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"));
|
||||
|
||||
/* Log the event */
|
||||
create_event_record(conn,
|
||||
&options,
|
||||
options.node,
|
||||
"standby_promote",
|
||||
true,
|
||||
details.data);
|
||||
|
||||
PQfinish(conn);
|
||||
|
||||
return;
|
||||
@@ -2232,7 +2215,7 @@ help(const char *progname)
|
||||
printf(_(" -p, --port=PORT database server port\n"));
|
||||
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
|
||||
printf(_("\nConfiguration options:\n"));
|
||||
printf(_(" -b. --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
|
||||
printf(_(" -b, --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
|
||||
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
||||
" copied to\n"));
|
||||
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
||||
@@ -2242,15 +2225,13 @@ help(const char *progname)
|
||||
" (default: postgres)\n"));
|
||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
|
||||
" wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS);
|
||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of\n" \
|
||||
" history\n"));
|
||||
printf(_(" -F, --force force potentially dangerous operations\n" \
|
||||
" to happen\n"));
|
||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||
printf(_(" -W, --wait wait for a master to appear\n"));
|
||||
printf(_(" -r, --rsync-only use only rsync to clone a standby\n"));
|
||||
printf(_(" -c, --fast-checkpoint force fast checkpoint when cloning a standby\n"));
|
||||
printf(_(" --recovery-min-apply-delay=VALUE set recovery_min_apply_delay in recovery.conf\n" \
|
||||
" when cloning a standby (PostgreSQL 9.4 and later)\n"));
|
||||
printf(_(" --fast-checkpoint force fast checkpoint when cloning a standby\n"));
|
||||
printf(_(" --ignore-external-config-files don't copy configuration files located outside \n" \
|
||||
" the data directory when cloning a standby\n"));
|
||||
printf(_(" --initdb-no-pwprompt don't require superuser password when running initdb\n"));
|
||||
@@ -2440,7 +2421,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
*/
|
||||
if (is_directory)
|
||||
{
|
||||
/* Files we don't want */
|
||||
/* Files which we don't want */
|
||||
appendPQExpBuffer(&rsync_flags, "%s",
|
||||
" --exclude=postmaster.pid --exclude=postmaster.opts --exclude=global/pg_control");
|
||||
|
||||
@@ -2454,11 +2435,11 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
" --exclude=postgresql.auto.conf.tmp");
|
||||
}
|
||||
|
||||
/* Temporary files we don't want, if they exist */
|
||||
/* Temporary files which we don't want, if they exist */
|
||||
appendPQExpBuffer(&rsync_flags, " --exclude=%s*",
|
||||
PG_TEMP_FILE_PREFIX);
|
||||
|
||||
/* Directories we don't want */
|
||||
/* Directories which we don't want */
|
||||
appendPQExpBuffer(&rsync_flags, "%s",
|
||||
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
||||
|
||||
@@ -2701,7 +2682,6 @@ create_schema(PGconn *conn)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
|
||||
/* create schema */
|
||||
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", get_repmgr_schema_quoted(conn));
|
||||
log_debug(_("master register: %s\n"), sqlquery);
|
||||
@@ -3271,6 +3251,92 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
update_node_record_set_master(PGconn *conn, int this_node_id)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
log_debug(_("Setting %i as master and marking existing master as failed\n"), this_node_id);
|
||||
|
||||
res = PQexec(conn, "BEGIN");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to begin transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET active = FALSE "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND type = 'master' "
|
||||
" AND active IS TRUE ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
options.cluster_name);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set old master node as inactive: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
PQexec(conn, "ROLLBACK");
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET type = 'master', "
|
||||
" upstream_node_id = NULL "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
options.cluster_name,
|
||||
this_node_id);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set current node %i as active master: %s\n"),
|
||||
this_node_id,
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
PQexec(conn, "ROLLBACK");
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
res = PQexec(conn, "COMMIT");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set commit transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_check_upstream_config(void)
|
||||
{
|
||||
@@ -3307,7 +3373,6 @@ do_check_upstream_config(void)
|
||||
}
|
||||
|
||||
|
||||
|
||||
static char *
|
||||
make_pg_path(char *file)
|
||||
{
|
||||
|
||||
@@ -7,8 +7,11 @@
|
||||
#
|
||||
# repmgr and repmgrd require these items to be configured:
|
||||
|
||||
# Cluster name
|
||||
cluster=test
|
||||
# Cluster name - this will be used by repmgr to generate its internal
|
||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||
# to facilitate easier querying of the repmgr views and tables.
|
||||
cluster=example_cluster
|
||||
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
|
||||
1
repmgr.h
1
repmgr.h
@@ -49,6 +49,7 @@
|
||||
|
||||
#define MANUAL_FAILOVER 0
|
||||
#define AUTOMATIC_FAILOVER 1
|
||||
#define NODE_NOT_FOUND -1
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
|
||||
|
||||
|
||||
141
repmgrd.c
141
repmgrd.c
@@ -91,7 +91,6 @@ static void witness_monitor(void);
|
||||
static bool check_connection(PGconn *conn, const char *type);
|
||||
static bool set_local_node_failed(void);
|
||||
|
||||
static bool update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id);
|
||||
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||
|
||||
static void update_shared_memory(char *last_wal_standby_applied);
|
||||
@@ -284,9 +283,17 @@ main(int argc, char **argv)
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Retrieve record for this node from the database */
|
||||
/* Retrieve record for this node from the local database */
|
||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||
|
||||
/* No node record found - exit gracefully */
|
||||
if(node_info.node_id == NODE_NOT_FOUND)
|
||||
{
|
||||
log_err(_("No metadata record found for this node - terminating\n"));
|
||||
log_notice(_("HINT: was this node registered with 'repmgr (master|standby) register'?\n"));
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
||||
|
||||
/*
|
||||
@@ -335,9 +342,9 @@ main(int argc, char **argv)
|
||||
log_info(_("starting continuous master connection check\n"));
|
||||
|
||||
/*
|
||||
* Check that master is still alive.
|
||||
* XXX We should also check that the
|
||||
* standby servers are sending info
|
||||
* Check that master is still alive.
|
||||
* XXX We should also check that the
|
||||
* standby servers are sending info
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -637,9 +644,9 @@ witness_monitor(void)
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_monitor "
|
||||
" (master_node, standby_node, "
|
||||
" (primary_node, standby_node, "
|
||||
" last_monitor_time, last_apply_time, "
|
||||
" last_wal_master_location, last_wal_standby_location, "
|
||||
" last_wal_primary_location, last_wal_standby_location, "
|
||||
" replication_lag, apply_lag )"
|
||||
" VALUES(%d, %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
||||
@@ -993,9 +1000,9 @@ standby_monitor(void)
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_monitor "
|
||||
" (master_node, standby_node, "
|
||||
" (primary_node, standby_node, "
|
||||
" last_monitor_time, last_apply_time, "
|
||||
" last_wal_master_location, last_wal_standby_location, "
|
||||
" last_wal_primary_location, last_wal_standby_location, "
|
||||
" replication_lag, apply_lag ) "
|
||||
" VALUES(%d, %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
@@ -1434,25 +1441,6 @@ do_master_failover(void)
|
||||
/* and reconnect to the local database */
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
|
||||
/* update node information to reflect new status */
|
||||
if(update_node_record_set_master(my_local_conn, node_info.node_id, failed_master.node_id) == false)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("unable to update node record for node %i (promoted to master following failure of node %i)"),
|
||||
node_info.node_id,
|
||||
failed_master.node_id);
|
||||
|
||||
log_err("%s\n", event_details.data);
|
||||
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_promote",
|
||||
false,
|
||||
event_details.data);
|
||||
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
/* update internal record for this node */
|
||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||
@@ -1749,7 +1737,7 @@ set_local_node_failed(void)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int active_master_node_id = -1;
|
||||
int active_master_node_id = NODE_NOT_FOUND;
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
if (!check_connection(master_conn, "master"))
|
||||
@@ -2089,7 +2077,7 @@ update_registration(void)
|
||||
|
||||
log_err("%s\n", errmsg.data);
|
||||
|
||||
create_event_record(my_local_conn,
|
||||
create_event_record(master_conn,
|
||||
&local_options,
|
||||
local_options.node,
|
||||
"repmgrd_shutdown",
|
||||
@@ -2234,7 +2222,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
t_node_info node_info = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
t_node_info node_info = { NODE_NOT_FOUND, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
|
||||
sprintf(sqlquery,
|
||||
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
||||
@@ -2260,7 +2248,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
|
||||
log_err("%s\n", errmsg.data);
|
||||
|
||||
create_event_record(my_local_conn,
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
local_options.node,
|
||||
"repmgrd_shutdown",
|
||||
@@ -2274,7 +2262,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
if (!PQntuples(res)) {
|
||||
log_warning(_("No record found record for node %i\n"), node_id);
|
||||
PQclear(res);
|
||||
node_info.node_id = -1;
|
||||
node_info.node_id = NODE_NOT_FOUND;
|
||||
return node_info;
|
||||
}
|
||||
|
||||
@@ -2313,93 +2301,6 @@ parse_node_type(const char *type)
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
log_debug(_("Setting failed node %i inactive; marking node %i as master\n"), old_master_node_id, this_node_id);
|
||||
|
||||
res = PQexec(conn, "BEGIN");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to begin transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET active = FALSE "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
local_options.cluster_name,
|
||||
old_master_node_id);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set old master node %i as inactive: %s\n"),
|
||||
old_master_node_id,
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
PQexec(conn, "ROLLBACK");
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
" SET type = 'master', "
|
||||
" upstream_node_id = NULL "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
local_options.cluster_name,
|
||||
this_node_id);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set current node %i as active master: %s\n"),
|
||||
this_node_id,
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
PQexec(conn, "ROLLBACK");
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
res = PQexec(conn, "COMMIT");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to set commit transaction: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user