Compare commits

..

25 Commits

Author SHA1 Message Date
Christian Kruse
069f9ff2ed version push 2014-03-17 14:26:56 +01:00
Christian Kruse
b8ade8e908 fixing some documentation errors 2014-03-10 15:51:55 +01:00
Christian Kruse
c0abb3be31 Merge branch 'master' into REL2_0_STABLE 2014-03-06 15:23:52 +01:00
Christian Kruse
fed5c77653 various improvements and bugfixes in the init script 2014-03-06 15:23:22 +01:00
Christian Kruse
8429b43edf Merge pull request #14 from wamonite/fix_follow_user
fix: store the master connection user name on standby follow
2014-03-06 15:20:02 +01:00
Warren Moore
7e55ce737d fix: store the master connection user name on standby follow 2014-03-05 16:49:56 +00:00
Christian Kruse
98c7635fb5 fixing more compiler warnings 2014-03-04 17:58:36 +01:00
Christian Kruse
90ecb2b107 fix: check return values of freopen()
Some compiles complain about not checking the return value of freopen(),
so we check it
2014-03-04 15:32:48 +01:00
Christian Kruse
50b9022a41 fix: don't use Windows newlines 2014-03-04 12:59:23 +01:00
Christian Kruse
150ccc0662 add option to avoid repmgrd started upon installation
Now repmgr.repmgrd.default has another option: REPMGRD_ENABLED. Valid
values are either yes or no.
2014-03-04 12:46:05 +01:00
Christian Kruse
0a71123920 Merge branch 'master' into REL2_0_STABLE 2014-03-03 09:25:08 +01:00
Christian Kruse
0ff14a2aa1 avoid compiler warnings 2014-02-21 13:47:29 +01:00
Christian Kruse
5215265694 fix: now CloseConnections() is much more safe 2014-02-18 17:06:36 +01:00
Christian Kruse
e45ac25348 fix: progname is const, do not free it
The leak is irrelevant
2014-02-18 16:45:35 +01:00
Christian Kruse
a1ce01f033 fix: fixed some leaks 2014-02-18 16:35:29 +01:00
Christian Kruse
516cde621a fix: strcpy() on overlapping memory regions is invalid 2014-02-18 15:42:20 +01:00
Christian Kruse
f0807923a3 fix: gettimeofday() expects two arguments 2014-02-18 15:33:56 +01:00
Christian Kruse
10ca8037f8 added some more log messages
Now we should be able to distinguish different events more easily
2014-02-18 14:10:12 +01:00
Christian Kruse
0dc46f0dc8 fix: set connection to NULL when finishing it
This will avoid CloseConnections() to try to close an already closed connection.
2014-02-18 13:42:49 +01:00
Christian Kruse
c3b58658ad fixing repmgr repl_status columns
repmgr repl_status had the column time_lag which was documented to be
the time a standby is behind master. In fact it only works like this
when viewed on the standby and not on the master: there it only was the
time of the last status update. We dropped that column and replaced it
by a new column „communication_time_lag“ which is the content of the
repl_status column on the master. On the standby we contain the time of
the last update in shared mem though refer always to the correct time
nonetheless where repl_status is queried. We also added a new column,
„replication_time_lag“, which refers to the apply delay.
2014-02-15 01:35:27 +01:00
Christian Kruse
18f1fed77f fixing wait_connection_availability()
wait_connection_availability() did take at least 2 seconds per call in
the old incarnation. Now we may finish a call without any sleep at all
when the result is already ready at the time called
2014-02-15 01:31:12 +01:00
Christian Kruse
d58fd080ca flush stderr after a log message appears
We had the problem that the log file appeared empty for a long time due
to file buffers. Thus we call fflush() after every log message so the
log file gets written out to disk quickly
2014-02-15 01:29:12 +01:00
Christian Kruse
c4ac2d3343 fixing PQexec() calls
fixing several calls where we did not check the result status but only
the return value; the query may fail nonetheless
2014-02-15 01:27:53 +01:00
Christian Kruse
a72c2296e9 Merge branch 'master' into REL2_0_STABLE 2014-02-11 09:28:40 +01:00
Christian Kruse
5ff1beeea7 do not enable autofailover by default
Autofailover is an experimental feature which should not be enabled by
default. The user has to be aware of what he is doing when enabling it.
2014-02-11 09:27:31 +01:00
14 changed files with 395 additions and 119 deletions

View File

@@ -625,18 +625,18 @@ Now restore to the original configuration by stopping
primary server, then bringing up "node2" as a standby with a valid primary server, then bringing up "node2" as a standby with a valid
``recovery.conf`` file. ``recovery.conf`` file.
Stop the "node2" server:: Stop the "node2" server and type the following on "node1" server::
repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote
Now the original primary, "node1" is acting again as primary. Now the original primary, "node1", is acting again as primary.
Start the "node2" server and type this on "node1":: Start the "node2" server and type this on "node2"::
repmgr standby clone --force -h node2 -p 5432 -U postgres -R postgres --verbose repmgr standby clone --force -h node2 -p 5432 -U postgres -R postgres --verbose
Verify the roles have reversed by attempting to insert a record on "node" Verify the roles have reversed by attempting to insert a record on "node1"
and on "node1". and on "node2".
The servers are now again acting as primary on "node1" and standby on "node2". The servers are now again acting as primary on "node1" and standby on "node2".

View File

@@ -191,7 +191,9 @@ trim (char *s)
++s1; ++s1;
/* Copy finished string */ /* Copy finished string */
strcpy (s, s1); memmove (s, s1, s2 - s1);
s[s2 - s1 + 1] = '\0';
return s; return s;
} }

View File

@@ -18,6 +18,8 @@
*/ */
#include <unistd.h> #include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include "repmgr.h" #include "repmgr.h"
#include "strutil.h" #include "strutil.h"
@@ -426,34 +428,65 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
* return 1 if Ok; 0 if any error ocurred; -1 if timeout reached * return 1 if Ok; 0 if any error ocurred; -1 if timeout reached
*/ */
int int
wait_connection_availability(PGconn *conn, int timeout) wait_connection_availability(PGconn *conn, long long timeout)
{ {
PGresult *res; PGresult *res;
fd_set read_set;
int sock = PQsocket(conn);
struct timeval tmout, before, after;
struct timezone tz;
while(timeout-- >= 0) /* recalc to microseconds */
timeout *= 1000000;
while (timeout > 0)
{ {
if (PQconsumeInput(conn) == 0) if (PQconsumeInput(conn) == 0)
{ {
log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"), log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
return 0; return 0;
} }
if (PQisBusy(conn) == 0) if (PQisBusy(conn) == 0)
{ {
res = PQgetResult(conn); do {
if (res == NULL) res = PQgetResult(conn);
break; PQclear(res);
PQclear(res); } while(res != NULL);
break;
} }
sleep(1);
tmout.tv_sec = 0;
tmout.tv_usec = 250000;
FD_ZERO(&read_set);
FD_SET(sock, &read_set);
gettimeofday(&before, &tz);
if (select(sock, &read_set, NULL, NULL, &tmout) == -1)
{
log_warning(
_("wait_connection_availability: select() returned with error: %s"),
strerror(errno));
return -1;
}
gettimeofday(&after, &tz);
timeout -= (after.tv_sec * 1000000 + after.tv_usec) -
(before.tv_sec * 1000000 + before.tv_usec);
} }
if (timeout >= 0) if (timeout >= 0)
{
return 1; return 1;
else {
log_warning(_("wait_connection_availability: timeout reached"));
return -1;
} }
log_warning(_("wait_connection_availability: timeout reached"));
return -1;
} }

View File

@@ -39,6 +39,6 @@ const char *get_cluster_size(PGconn *conn);
PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
int *master_id, char *master_conninfo_out); int *master_id, char *master_conninfo_out);
int wait_connection_availability(PGconn *conn, int timeout); int wait_connection_availability(PGconn *conn, long long timeout);
bool CancelQuery(PGconn *conn, int timeout); bool CancelQuery(PGconn *conn, int timeout);
#endif #endif

View File

@@ -1,14 +1,18 @@
#!/bin/sh
# default settings for repmgrd. This file is source by /bin/sh from # default settings for repmgrd. This file is source by /bin/sh from
# /etc/init.d/repmgrd # /etc/init.d/repmgrd
# Options for repmgrd # disable repmgrd by default so it won't get started upon installation
REPMGRD_OPTS="" # valid values: yes/no
REPMGRD_ENABLED=no
# Options for repmgrd (required)
#REPMGRD_OPTS="--config_file /path/to/repmgr.conf"
# User to run repmgrd as
#REPMGRD_USER=postgres
# repmgrd binary # repmgrd binary
REPMGR_BIN="/usr/bin/repmgr" #REPMGR_BIN=/usr/bin/repmgr
# pid file # pid file
REPMGR_PIDFILE="/var/run/repmgrd.pid" #REPMGR_PIDFILE=/var/run/repmgrd.pid

View File

@@ -1,48 +1,101 @@
#!/bin/sh #!/bin/sh
### BEGIN INIT INFO ### BEGIN INIT INFO
# Provides: repmgrd # Provides: repmgrd
# Required-Start: $local_fs $remote_fs $network $syslog $postgresql # Required-Start: $local_fs $remote_fs $network $syslog postgresql
# Required-Stop: $local_fs $remote_fs $network $syslog $postgresql # Required-Stop: $local_fs $remote_fs $network $syslog postgresql
# Should-Start: $syslog $postgresql # Should-Start: $syslog postgresql
# Should-Start: $syslog $postgresql
# Default-Start: 2 3 4 5 # Default-Start: 2 3 4 5
# Default-Stop: 0 1 6 # Default-Stop: 0 1 6
# Short-Description: Start/stop repmgrd # Short-Description: Start/stop repmgrd
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
### END INIT INFO ### END INIT INFO
set -e set -e
if test -f /etc/default/repmgrd; then DESC="PostgreSQL replication management and monitoring daemon"
. /etc/default/repmgrd NAME=repmgrd
fi
if [ -z "$REPMGRD_BIN" ]; then REPMGRD_ENABLED=no
REPMGRD_BIN="/usr/bin/repmgrd" REPMGRD_OPTS=
fi REPMGRD_USER=postgres
REPMGRD_BIN=/usr/bin/repmgrd
REPMGRD_PIDFILE=/var/run/repmgrd.pid
if [ -z "$REPMGRD_PIDFILE" ]; then # Read configuration variable file if it is present
REPMGRD_PIDFILE="/var/run/repmgrd.pid" [ -r /etc/default/$NAME ] && . /etc/default/$NAME
fi
test -x $REPMGRD_BIN || exit 0 test -x $REPMGRD_BIN || exit 0
case "$1" in case "$REPMGRD_ENABLED" in
start) [Yy]*)
start-stop-daemon --start --quiet --make-pidfile --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN $REPMGRD_OPTS break
;; ;;
stop)
start-stop-daemon --stop --oknodo --quiet --pidfile $REPMGRD_PIDFILE
;;
restart)
$0 stop && $0 start || exit 1
;;
*) *)
echo "Usage: $0 {start|stop|restart}" exit 0
exit 1 ;;
;; esac
# Define LSB log_* functions.
. /lib/lsb/init-functions
if [ -z "$REPMGRD_OPTS" ]
then
log_warning_msg "Not starting $NAME, REPMGRD_OPTS not set in /etc/default/$NAME"
exit 0
fi
do_start()
{
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# other if daemon could not be started or a failure occured
start-stop-daemon --start --quiet --chuid $REPMGRD_USER --make-pidfile --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN -- $REPMGRD_OPTS
}
do_stop()
{
# Return
# 0 if daemon has been stopped
# 1 if daemon was already stopped
# other if daemon could not be stopped or a failure occurred
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN
}
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0) log_end_msg 0 ;;
1) log_progress_msg "already started"
log_end_msg 0 ;;
*) log_end_msg 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
do_stop
case "$?" in
0) log_end_msg 0 ;;
1) log_progress_msg "already stopped"
log_end_msg 0 ;;
*) log_end_msg 1 ;;
esac
;;
restart|force-reload)
$0 stop
$0 start
;;
status)
status_of_proc -p $PIDFILE $DAEMON $NAME && exit 0 || exit $?
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload|status}" >&2
exit 3
;;
esac esac
exit 0 exit 0

11
log.c
View File

@@ -58,6 +58,8 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
vfprintf(stderr, fmt1, ap); vfprintf(stderr, fmt1, ap);
va_end(ap); va_end(ap);
fflush(stderr);
} }
} }
@@ -142,7 +144,14 @@ bool logger_init(t_configuration_options *opts, const char* ident, const char* l
if (*opts->logfile) if (*opts->logfile)
{ {
freopen(opts->logfile, "a", stderr); FILE *fd;
fd = freopen(opts->logfile, "a", stderr);
if (fd == NULL)
{
fprintf(stderr, "error reopening stderr to '%s': %s",
opts->logfile, strerror(errno));
}
} }
return true; return true;

View File

@@ -575,13 +575,15 @@ do_master_register(void)
repmgr_schema, options.node); repmgr_schema, options.node);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_warning(_("Cannot delete node details, %s\n"), log_warning(_("Cannot delete node details, %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
} }
/* Ensure there isn't any other master already registered */ /* Ensure there isn't any other master already registered */
@@ -603,13 +605,15 @@ do_master_register(void)
options.conninfo, options.priority); options.conninfo, options.priority);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_warning(_("Cannot insert node details, %s\n"), log_warning(_("Cannot insert node details, %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
PQfinish(conn); PQfinish(conn);
log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"), log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
@@ -735,7 +739,8 @@ do_standby_register(void)
log_debug(_("standby register: %s\n"), sqlquery); log_debug(_("standby register: %s\n"), sqlquery);
if (!PQexec(master_conn, sqlquery)) res = PQexec(master_conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot delete node details, %s\n"), log_err(_("Cannot delete node details, %s\n"),
PQerrorMessage(master_conn)); PQerrorMessage(master_conn));
@@ -743,6 +748,7 @@ do_standby_register(void)
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
} }
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority) " sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority) "
@@ -1488,6 +1494,7 @@ do_standby_follow(void)
*/ */
strncpy(runtime_options.host, PQhost(master_conn), MAXLEN); strncpy(runtime_options.host, PQhost(master_conn), MAXLEN);
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN); strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
strncpy(runtime_options.username, PQuser(master_conn), MAXLEN);
PQfinish(master_conn); PQfinish(master_conn);
log_info(_("%s Changing standby's master\n"),progname); log_info(_("%s Changing standby's master\n"),progname);
@@ -1695,7 +1702,8 @@ do_witness_create(void)
repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo, options.priority); repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo, options.priority);
log_debug(_("witness create: %s"), sqlquery); log_debug(_("witness create: %s"), sqlquery);
if (!PQexec(masterconn, sqlquery)) res = PQexec(masterconn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(masterconn)); log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(masterconn));
PQfinish(masterconn); PQfinish(masterconn);
@@ -2068,16 +2076,49 @@ static bool
create_schema(PGconn *conn) create_schema(PGconn *conn)
{ {
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
PGresult *res;
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema); sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot create the schema %s: %s\n"), log_err(_("Cannot create the schema %s: %s\n"),
repmgr_schema, PQerrorMessage(conn)); repmgr_schema, PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
/* to avoid confusion of the time_lag field and provide a consistent UI we
* use these functions for providing the latest update timestamp */
sqlquery_snprintf(sqlquery,
"CREATE FUNCTION %s.repmgr_update_last_updated() RETURNS TIMESTAMP WITH TIME ZONE "
"AS '$libdir/repmgr_funcs', 'repmgr_update_last_updated' "
" LANGUAGE C STRICT", repmgr_schema);
res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
fprintf(stderr, "Cannot create the function repmgr_update_last_updated: %s\n",
PQerrorMessage(conn));
return false;
}
PQclear(res);
sqlquery_snprintf(sqlquery,
"CREATE FUNCTION %s.repmgr_get_last_updated() RETURNS TIMESTAMP WITH TIME ZONE "
"AS '$libdir/repmgr_funcs', 'repmgr_get_last_updated' "
"LANGUAGE C STRICT", repmgr_schema);
res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
fprintf(stderr, "Cannot create the function repmgr_get_last_updated: %s\n",
PQerrorMessage(conn));
return false;
}
PQclear(res);
/* ... the tables */ /* ... the tables */
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( "
@@ -2088,86 +2129,101 @@ create_schema(PGconn *conn)
" priority integer not null, " " priority integer not null, "
" witness boolean not null default false)", repmgr_schema); " witness boolean not null default false)", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot create the table %s.repl_nodes: %s\n"), log_err(_("Cannot create the table %s.repl_nodes: %s\n"),
repmgr_schema, PQerrorMessage(conn)); repmgr_schema, PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( " sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( "
" primary_node INTEGER NOT NULL, " " primary_node INTEGER NOT NULL, "
" standby_node INTEGER NOT NULL, " " standby_node INTEGER NOT NULL, "
" last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, "
" last_apply_time TIMESTAMP WITH TIME ZONE, "
" last_wal_primary_location TEXT NOT NULL, " " last_wal_primary_location TEXT NOT NULL, "
" last_wal_standby_location TEXT, " " last_wal_standby_location TEXT, "
" replication_lag BIGINT NOT NULL, " " replication_lag BIGINT NOT NULL, "
" apply_lag BIGINT NOT NULL) ", repmgr_schema); " apply_lag BIGINT NOT NULL) ", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot create the table %s.repl_monitor: %s\n"), log_err(_("Cannot create the table %s.repl_monitor: %s\n"),
repmgr_schema, PQerrorMessage(conn)); repmgr_schema, PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
/* a view */ /* a view */
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
" SELECT primary_node, standby_node, name AS standby_name, last_monitor_time, " " SELECT primary_node, standby_node, name AS standby_name, last_monitor_time, "
" last_wal_primary_location, last_wal_standby_location, " " last_wal_primary_location, last_wal_standby_location, "
" pg_size_pretty(replication_lag) replication_lag, " " pg_size_pretty(replication_lag) replication_lag, "
" age(now(), last_apply_time) AS replication_time_lag, "
" pg_size_pretty(apply_lag) apply_lag, " " pg_size_pretty(apply_lag) apply_lag, "
" age(now(), last_monitor_time) AS time_lag " " age(now(), CASE WHEN pg_is_in_recovery() THEN %s.repmgr_get_last_updated() ELSE last_monitor_time END) AS communication_time_lag "
" FROM %s.repl_monitor JOIN %s.repl_nodes ON standby_node = id " " FROM %s.repl_monitor JOIN %s.repl_nodes ON standby_node = id "
" WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monitor_time) " " WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monitor_time) "
" FROM %s.repl_monitor GROUP BY 1)", " FROM %s.repl_monitor GROUP BY 1)",
repmgr_schema, repmgr_schema, repmgr_schema, repmgr_schema); repmgr_schema, repmgr_schema, repmgr_schema, repmgr_schema, repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot create the view %s.repl_status: %s\n"), log_err(_("Cannot create the view %s.repl_status: %s\n"),
repmgr_schema, PQerrorMessage(conn)); repmgr_schema, PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
/* an index to improve performance of the view */ /* an index to improve performance of the view */
sqlquery_snprintf(sqlquery, "CREATE INDEX idx_repl_status_sort " sqlquery_snprintf(sqlquery, "CREATE INDEX idx_repl_status_sort "
" ON %s.repl_monitor (last_monitor_time, standby_node) ", " ON %s.repl_monitor (last_monitor_time, standby_node) ",
repmgr_schema); repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery); log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot indexing table %s.repl_monitor: %s\n"), log_err(_("Can't index table %s.repl_monitor: %s\n"),
repmgr_schema, PQerrorMessage(conn)); repmgr_schema, PQerrorMessage(conn));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res);
/* XXX Here we MUST try to load the repmgr_function.sql not hardcode it here */ /* XXX Here we MUST try to load the repmgr_function.sql not hardcode it here */
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"CREATE OR REPLACE FUNCTION %s.repmgr_update_standby_location(text) RETURNS boolean " "CREATE OR REPLACE FUNCTION %s.repmgr_update_standby_location(text) RETURNS boolean "
"AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location' " "AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location' "
"LANGUAGE C STRICT ", repmgr_schema); "LANGUAGE C STRICT ", repmgr_schema);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n", fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n",
PQerrorMessage(conn)); PQerrorMessage(conn));
return false; return false;
} }
PQclear(res);
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"CREATE OR REPLACE FUNCTION %s.repmgr_get_last_standby_location() RETURNS text " "CREATE OR REPLACE FUNCTION %s.repmgr_get_last_standby_location() RETURNS text "
"AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location' " "AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location' "
"LANGUAGE C STRICT ", repmgr_schema); "LANGUAGE C STRICT ", repmgr_schema);
if (!PQexec(conn, sqlquery)) res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n", fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n",
PQerrorMessage(conn)); PQerrorMessage(conn));
return false; return false;
} }
PQclear(res);
return true; return true;
} }
@@ -2182,7 +2238,8 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn)
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", repmgr_schema); sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", repmgr_schema);
log_debug("copy_configuration: %s\n", sqlquery); log_debug("copy_configuration: %s\n", sqlquery);
if (!PQexec(witnessconn, sqlquery)) res = PQexec(witnessconn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
fprintf(stderr, "Cannot clean node details in the witness, %s\n", fprintf(stderr, "Cannot clean node details in the witness, %s\n",
PQerrorMessage(witnessconn)); PQerrorMessage(witnessconn));
@@ -2208,7 +2265,8 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn)
atoi(PQgetvalue(res, i, 3)), atoi(PQgetvalue(res, i, 3)),
PQgetvalue(res, i, 4)); PQgetvalue(res, i, 4));
if (!PQexec(witnessconn, sqlquery)) res = PQexec(witnessconn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
fprintf(stderr, "Cannot copy configuration to witness, %s\n", fprintf(stderr, "Cannot copy configuration to witness, %s\n",
PQerrorMessage(witnessconn)); PQerrorMessage(witnessconn));

View File

@@ -22,7 +22,7 @@ reconnect_attempts=6
reconnect_interval=10 reconnect_interval=10
# Autofailover options # Autofailover options
failover=automatic failover=manual
priority=-1 priority=-1
promote_command='repmgr standby promote -f /path/to/repmgr.conf' promote_command='repmgr standby promote -f /path/to/repmgr.conf'
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W' follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'

View File

@@ -144,17 +144,24 @@ static void terminate(int retval);
static void setup_event_handlers(void); static void setup_event_handlers(void);
#endif #endif
static void do_daemonize(); static void do_daemonize(void);
static void check_and_create_pid_file(const char *pid_file); static void check_and_create_pid_file(const char *pid_file);
#define CloseConnections() \ static void
if (PQisBusy(primaryConn) == 1) \ CloseConnections() {
(void) CancelQuery(primaryConn, local_options.master_response_timeout); \ if (primaryConn != NULL && PQisBusy(primaryConn) == 1)
if (myLocalConn != NULL) \ CancelQuery(primaryConn, local_options.master_response_timeout);
PQfinish(myLocalConn); \
if (primaryConn != NULL && primaryConn != myLocalConn) \ if (myLocalConn != NULL)
PQfinish(myLocalConn);
if (primaryConn != NULL && primaryConn != myLocalConn)
PQfinish(primaryConn); PQfinish(primaryConn);
primaryConn = NULL;
myLocalConn = NULL;
}
int int
main(int argc, char **argv) main(int argc, char **argv)
@@ -172,6 +179,7 @@ main(int argc, char **argv)
int optindex; int optindex;
int c, ret; int c, ret;
bool daemonize = false; bool daemonize = false;
FILE *fd;
char standby_version[MAXVERSIONSTR], *ret_ver; char standby_version[MAXVERSIONSTR], *ret_ver;
@@ -241,8 +249,19 @@ main(int argc, char **argv)
terminate(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
freopen("/dev/null", "r", stdin); fd = freopen("/dev/null", "r", stdin);
freopen("/dev/null", "w", stdout); if (fd == NULL)
{
fprintf(stderr, "error reopening stdin to '/dev/null': %s",
strerror(errno));
}
fd = freopen("/dev/null", "w", stdout);
if (fd == NULL)
{
fprintf(stderr, "error reopening stdout to '/dev/null': %s",
strerror(errno));
}
logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility); logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility);
if (verbose) if (verbose)
@@ -250,7 +269,13 @@ main(int argc, char **argv)
if (log_type == REPMGR_SYSLOG) if (log_type == REPMGR_SYSLOG)
{ {
freopen("/dev/null", "w", stderr); fd = freopen("/dev/null", "w", stderr);
if (fd == NULL)
{
fprintf(stderr, "error reopening stderr to '/dev/null': %s",
strerror(errno));
}
} }
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
@@ -358,7 +383,14 @@ main(int argc, char **argv)
if (*local_options.logfile) if (*local_options.logfile)
{ {
freopen(local_options.logfile, "a", stderr); FILE *fd;
fd = freopen(local_options.logfile, "a", stderr);
if (fd == NULL)
{
fprintf(stderr, "error reopening stderr to '%s': %s",
local_options.logfile, strerror(errno));
}
} }
update_registration(); update_registration();
@@ -431,10 +463,6 @@ main(int argc, char **argv)
} while (true); } while (true);
/* Prevent a double-free */
if (primaryConn == myLocalConn)
myLocalConn = NULL;
/* close the connection to the database and cleanup */ /* close the connection to the database and cleanup */
CloseConnections(); CloseConnections();
@@ -530,6 +558,7 @@ StandbyMonitor(void)
char last_wal_primary_location[MAXLEN]; char last_wal_primary_location[MAXLEN];
char last_wal_standby_received[MAXLEN]; char last_wal_standby_received[MAXLEN];
char last_wal_standby_applied[MAXLEN]; char last_wal_standby_applied[MAXLEN];
char last_wal_standby_applied_timestamp[MAXLEN];
unsigned long long int lsn_primary; unsigned long long int lsn_primary;
unsigned long long int lsn_standby_received; unsigned long long int lsn_standby_received;
@@ -546,11 +575,15 @@ StandbyMonitor(void)
if (!CheckConnection(myLocalConn, "standby")) if (!CheckConnection(myLocalConn, "standby"))
{ {
log_err("Failed to connect to local node, exiting!\n");
terminate(1); terminate(1);
} }
if (PQstatus(primaryConn) != CONNECTION_OK) if (PQstatus(primaryConn) != CONNECTION_OK)
{ {
PQfinish(primaryConn);
primaryConn = NULL;
if (local_options.failover == MANUAL_FAILOVER) if (local_options.failover == MANUAL_FAILOVER)
{ {
log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n")); log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
@@ -637,7 +670,7 @@ StandbyMonitor(void)
sqlquery_snprintf( sqlquery_snprintf(
sqlquery, sqlquery,
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
"pg_last_xlog_replay_location()"); "pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp()");
res = PQexec(myLocalConn, sqlquery); res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -651,6 +684,7 @@ StandbyMonitor(void)
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN); strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN); strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN);
strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN); strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN);
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
PQclear(res); PQclear(res);
/* Get primary xlog info */ /* Get primary xlog info */
@@ -678,9 +712,10 @@ StandbyMonitor(void)
sqlquery_snprintf(sqlquery, sqlquery_snprintf(sqlquery,
"INSERT INTO %s.repl_monitor " "INSERT INTO %s.repl_monitor "
"VALUES(%d, %d, '%s'::timestamp with time zone, " "VALUES(%d, %d, '%s'::timestamp with time zone, "
" '%s', '%s', " " '%s'::timestamp with time zone, '%s', '%s', "
" %lld, %lld)", repmgr_schema, " %lld, %lld)", repmgr_schema,
primary_options.node, local_options.node, monitor_standby_timestamp, primary_options.node, local_options.node, monitor_standby_timestamp,
last_wal_standby_applied_timestamp,
last_wal_primary_location, last_wal_primary_location,
last_wal_standby_received, last_wal_standby_received,
(lsn_primary - lsn_standby_received), (lsn_primary - lsn_standby_received),
@@ -770,7 +805,12 @@ do_failover(void)
/* if we can't see the node just skip it */ /* if we can't see the node just skip it */
if (PQstatus(nodeConn) != CONNECTION_OK) if (PQstatus(nodeConn) != CONNECTION_OK)
{
if (nodeConn != NULL)
PQfinish(nodeConn);
continue; continue;
}
visible_nodes++; visible_nodes++;
nodes[i].is_visible = true; nodes[i].is_visible = true;
@@ -953,6 +993,7 @@ do_failover(void)
/* Close the connection to this server */ /* Close the connection to this server */
PQfinish(myLocalConn); PQfinish(myLocalConn);
myLocalConn = NULL;
/* /*
* determine which one is the best candidate to promote to primary * determine which one is the best candidate to promote to primary
@@ -1269,6 +1310,8 @@ terminate(int retval)
unlink(pid_file); unlink(pid_file);
} }
log_info("Terminating...\n");
exit(retval); exit(retval);
} }
@@ -1323,6 +1366,7 @@ do_daemonize()
{ {
char *ptr, path[MAXLEN]; char *ptr, path[MAXLEN];
pid_t pid = fork(); pid_t pid = fork();
int ret;
switch (pid) switch (pid)
{ {
@@ -1371,7 +1415,12 @@ do_daemonize()
*path = '/'; *path = '/';
} }
chdir(path); ret = chdir(path);
if (ret != 0)
{
log_err("Error changing directory to '%s': %s", path,
strerror(errno));
}
break; break;
@@ -1387,6 +1436,7 @@ check_and_create_pid_file(const char *pid_file)
FILE *fd; FILE *fd;
char buff[MAXLEN]; char buff[MAXLEN];
pid_t pid; pid_t pid;
size_t nread;
if (stat(pid_file, &st) != -1) if (stat(pid_file, &st) != -1)
{ {
@@ -1400,7 +1450,14 @@ check_and_create_pid_file(const char *pid_file)
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
fread(buff, MAXLEN - 1, 1, fd); nread = fread(buff, MAXLEN - 1, 1, fd);
if (nread == 0 && ferror(fd))
{
log_err("Error reading PID file '%s', giving up...\n", pid_file);
exit(ERR_BAD_CONFIG);
}
fclose(fd); fclose(fd);
pid = atoi(buff); pid = atoi(buff);

View File

@@ -15,6 +15,7 @@
#include "storage/shmem.h" #include "storage/shmem.h"
#include "storage/spin.h" #include "storage/spin.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/timestamp.h"
/* same definition as the one in xlog_internal.h */ /* same definition as the one in xlog_internal.h */
#define MAXFNAMELEN 64 #define MAXFNAMELEN 64
@@ -28,6 +29,7 @@ typedef struct repmgrSharedState
{ {
LWLockId lock; /* protects search/modification */ LWLockId lock; /* protects search/modification */
char location[MAXFNAMELEN]; /* last known xlog location */ char location[MAXFNAMELEN]; /* last known xlog location */
TimestampTz last_updated;
} repmgrSharedState; } repmgrSharedState;
/* Links to shared memory state */ /* Links to shared memory state */
@@ -49,6 +51,12 @@ Datum repmgr_get_last_standby_location(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgr_update_standby_location); PG_FUNCTION_INFO_V1(repmgr_update_standby_location);
PG_FUNCTION_INFO_V1(repmgr_get_last_standby_location); PG_FUNCTION_INFO_V1(repmgr_get_last_standby_location);
Datum repmgr_update_last_updated(PG_FUNCTION_ARGS);
Datum repmgr_get_last_updated(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgr_update_last_updated);
PG_FUNCTION_INFO_V1(repmgr_get_last_updated);
/* /*
* Module load callback * Module load callback
@@ -187,3 +195,38 @@ repmgr_update_standby_location(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(repmgr_set_standby_location(locationstr)); PG_RETURN_BOOL(repmgr_set_standby_location(locationstr));
} }
/* update and return last updated with current timestamp */
Datum
repmgr_update_last_updated(PG_FUNCTION_ARGS)
{
TimestampTz last_updated = GetCurrentTimestamp();
/* Safety check... */
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
shared_state->last_updated = last_updated;
LWLockRelease(shared_state->lock);
PG_RETURN_TIMESTAMPTZ(last_updated);
}
/* get last updated timestamp */
Datum
repmgr_get_last_updated(PG_FUNCTION_ARGS)
{
TimestampTz last_updated;
/* Safety check... */
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
last_updated = shared_state->last_updated;
LWLockRelease(shared_state->lock);
PG_RETURN_TIMESTAMPTZ(last_updated);
}

View File

@@ -1,6 +1,6 @@
/* /*
* repmgr_function.sql * repmgr_function.sql
* Copyright (c) 2ndQuadrant, 2010 * Copyright (c) 2ndQuadrant, 2010-2014
* *
*/ */
@@ -13,3 +13,11 @@ LANGUAGE C STRICT;
CREATE FUNCTION repmgr_get_last_standby_location() RETURNS text CREATE FUNCTION repmgr_get_last_standby_location() RETURNS text
AS 'MODULE_PATHNAME', 'repmgr_get_last_standby_location' AS 'MODULE_PATHNAME', 'repmgr_get_last_standby_location'
LANGUAGE C STRICT; LANGUAGE C STRICT;
CREATE FUNCTION repmgr_update_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
AS 'MODULE_PATHNAME', 'repmgr_update_last_updated'
LANGUAGE C STRICT;
CREATE FUNCTION repmgr_get_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
AS 'MODULE_PATHNAME', 'repmgr_get_last_updated'
LANGUAGE C STRICT;

View File

@@ -1,2 +1,11 @@
/*
* uninstall_repmgr_funcs.sql
* Copyright (c) 2ndQuadrant, 2010-2014
*
*/
DROP FUNCTION repmgr_update_standby_location(text); DROP FUNCTION repmgr_update_standby_location(text);
DROP FUNCTION repmgr_get_last_standby_location(); DROP FUNCTION repmgr_get_last_standby_location();
DROP FUNCTION repmgr_update_last_updated();
DROP FUNCTION repmgr_get_last_updated();

View File

@@ -1,5 +1,5 @@
#ifndef _VERSION_H_ #ifndef _VERSION_H_
#define _VERSION_H_ #define _VERSION_H_
#define REPMGR_VERSION "2.0RC1" #define REPMGR_VERSION "2.0"
#endif #endif