diff --git a/CREDITS b/CREDITS index 78e707b8..6f556936 100644 --- a/CREDITS +++ b/CREDITS @@ -10,3 +10,7 @@ Hannu Krosing Cédric Villemain Charles Duffy Daniel Farina +Shawn Ellis +Jay Taylor +Christian Kruse +Krzysztof Gajdemski diff --git a/HISTORY b/HISTORY index bfd36a17..68f84d1e 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,11 @@ -2.0beta 2012-07-27 +2.0beta2 2013-12-19 + Improve autofailover logic and algorithms (Jaime, Andres) + Ignore pg_log when cloning (Jaime) + Add timestamps to log line in stderr (Christian) + Correctly check wal_keep_segments (Jay Taylor) + Add a ssh_options parameter (Jay Taylor) + +2.0beta1 2012-07-27 Make CLONE command try to make an exact copy including $PGDATA location (Cedric) Add detection of master failure (Jaime) Add the notion of a witness server (Jaime) diff --git a/check_dir.c b/check_dir.c index 5ae4c096..f578b4e1 100644 --- a/check_dir.c +++ b/check_dir.c @@ -225,12 +225,12 @@ is_pg_dir(char *dir) struct stat sb; int r; - // test pgdata + /* test pgdata */ xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); if (stat(path, &sb) == 0) return true; - // test tablespace dir + /* test tablespace dir */ sprintf(path, "ls %s/PG_*/ -I*", dir); r = system(path); if (r == 0) @@ -256,7 +256,7 @@ create_pgdir(char *dir, bool force) { log_err(_("couldn't create directory \"%s\"...\n"), dir); - exit(ERR_BAD_CONFIG); + return false; } break; case 1: @@ -268,7 +268,7 @@ create_pgdir(char *dir, bool force) { log_err(_("could not change permissions of directory \"%s\": %s\n"), dir, strerror(errno)); - exit(ERR_BAD_CONFIG); + return false; } break; case 2: @@ -293,7 +293,7 @@ create_pgdir(char *dir, bool force) "If you are sure you want to clone here, " "please check there is no PostgreSQL server " "running and use the --force option\n")); - exit(ERR_BAD_CONFIG); + return false; } return false; @@ -301,7 +301,7 @@ create_pgdir(char *dir, bool force) /* Trouble accessing directory */ log_err(_("could not access directory \"%s\": %s\n"), dir, strerror(errno)); - exit(ERR_BAD_CONFIG); + return false; } return true; } diff --git a/config.c b/config.c index 7c4e4ca0..6f8f0362 100644 --- a/config.c +++ b/config.c @@ -41,6 +41,9 @@ parse_config(const char *config_file, t_configuration_options *options) memset(options->promote_command, 0, sizeof(options->promote_command)); memset(options->follow_command, 0, sizeof(options->follow_command)); memset(options->rsync_options, 0, sizeof(options->rsync_options)); + memset(options->ssh_options, 0, sizeof(options->ssh_options)); + memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); + memset(options->pgctl_options, 0, sizeof(options->pgctl_options)); /* if nothing has been provided defaults to 60 */ options->master_response_timeout = 60; @@ -78,6 +81,8 @@ parse_config(const char *config_file, t_configuration_options *options) strncpy (options->conninfo, value, MAXLEN); else if (strcmp(name, "rsync_options") == 0) strncpy (options->rsync_options, value, QUERY_STR_LEN); + else if (strcmp(name, "ssh_options") == 0) + strncpy (options->ssh_options, value, QUERY_STR_LEN); else if (strcmp(name, "loglevel") == 0) strncpy (options->loglevel, value, MAXLEN); else if (strcmp(name, "logfacility") == 0) @@ -111,6 +116,10 @@ parse_config(const char *config_file, t_configuration_options *options) options->reconnect_attempts = atoi(value); else if (strcmp(name, "reconnect_interval") == 0) options->reconnect_intvl = atoi(value); + else if (strcmp(name, "pg_bindir") == 0) + strncpy (options->pg_bindir, value, MAXLEN); + else if (strcmp(name, "pg_ctl_options") == 0) + strncpy (options->pgctl_options, value, MAXLEN); else log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value); } @@ -148,6 +157,12 @@ parse_config(const char *config_file, t_configuration_options *options) log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n")); exit(ERR_BAD_CONFIG); } + + if (*options->pg_bindir == '\0') + { + log_err(_("pg_bindir config value not found. Check the configuration file.\n")); + exit(ERR_BAD_CONFIG); + } } @@ -218,49 +233,49 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) parse_config(config_file, &new_options); if (new_options.node == -1) { - log_warning(_("\nCannot load new configuration, will keep current one.\n")); + log_warning(_("Cannot load new configuration, will keep current one.\n")); return false; } if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) { - log_warning(_("\nCannot change cluster name, will keep current configuration.\n")); + log_warning(_("Cannot change cluster name, will keep current configuration.\n")); return false; } if (new_options.node != orig_options->node) { - log_warning(_("\nCannot change node number, will keep current configuration.\n")); + log_warning(_("Cannot change node number, will keep current configuration.\n")); return false; } - if (new_options.node_name != orig_options->node_name) + if (strcmp(new_options.node_name, orig_options->node_name) != 0) { - log_warning(_("\nCannot change standby name, will keep current configuration.\n")); + log_warning(_("Cannot change standby name, will keep current configuration.\n")); return false; } if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER) { - log_warning(_("\nNew value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); + log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); return false; } if (new_options.master_response_timeout <= 0) { - log_warning(_("\nNew value for master_response_timeout is not valid. Should be greater than zero.\n")); + log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n")); return false; } if (new_options.reconnect_attempts < 0) { - log_warning(_("\nNew value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); + log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); return false; } if (new_options.reconnect_intvl < 0) { - log_warning(_("\nNew value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); + log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); return false; } @@ -268,7 +283,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) conn = establishDBConnection(new_options.conninfo, false); if (!conn || (PQstatus(conn) != CONNECTION_OK)) { - log_warning(_("\nconninfo string is not valid, will keep current configuration.\n")); + log_warning(_("conninfo string is not valid, will keep current configuration.\n")); return false; } PQfinish(conn); @@ -283,6 +298,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) strcpy(orig_options->promote_command, new_options.promote_command); strcpy(orig_options->follow_command, new_options.follow_command); strcpy(orig_options->rsync_options, new_options.rsync_options); + strcpy(orig_options->ssh_options, new_options.ssh_options); orig_options->master_response_timeout = new_options.master_response_timeout; orig_options->reconnect_attempts = new_options.reconnect_attempts; orig_options->reconnect_intvl = new_options.reconnect_intvl; diff --git a/config.h b/config.h index 4e4cdaf3..8c8bbea1 100644 --- a/config.h +++ b/config.h @@ -36,11 +36,16 @@ typedef struct char loglevel[MAXLEN]; char logfacility[MAXLEN]; char rsync_options[QUERY_STR_LEN]; + char ssh_options[QUERY_STR_LEN]; int master_response_timeout; int reconnect_attempts; int reconnect_intvl; + char pg_bindir[MAXLEN]; + char pgctl_options[MAXLEN]; } t_configuration_options; +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "" } + void parse_config(const char *config_file, t_configuration_options *options); void parse_line(char *buff, char *name, char *value); char *trim(char *s); diff --git a/dbutils.c b/dbutils.c index 0e4c4749..51d768e2 100644 --- a/dbutils.c +++ b/dbutils.c @@ -138,7 +138,7 @@ is_pgup(PGconn *conn, int timeout) { if (twice) return false; - PQreset(conn); // reconnect + PQreset(conn); /* reconnect */ twice = true; } else @@ -164,10 +164,10 @@ is_pgup(PGconn *conn, int timeout) break; failed: - // we need to retry, because we might just have loose the connection once + /* we need to retry, because we might just have loose the connection once */ if (twice) return false; - PQreset(conn); // reconnect + PQreset(conn); /* reconnect */ twice = true; } } @@ -250,6 +250,40 @@ guc_setted(PGconn *conn, const char *parameter, const char *op, return true; } +/** + * Just like guc_setted except with an extra parameter containing the name of + * the pg datatype so that the comparison can be done properly. + */ +bool +guc_setted_typed(PGconn *conn, const char *parameter, const char *op, + const char *value, const char *datatype) +{ + PGresult *res; + char sqlquery[QUERY_STR_LEN]; + + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " + " WHERE name = '%s' AND setting::%s %s '%s'::%s", + parameter, datatype, op, value, datatype); + + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("GUC setting check PQexec failed: %s"), + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + exit(ERR_DB_QUERY); + } + if (PQntuples(res) == 0) + { + PQclear(res); + return false; + } + PQclear(res); + + return true; +} + const char * get_cluster_size(PGconn *conn) diff --git a/dbutils.h b/dbutils.h index 8d983048..69c1a73d 100644 --- a/dbutils.h +++ b/dbutils.h @@ -32,6 +32,9 @@ bool is_pgup(PGconn *conn, int timeout); char *pg_version(PGconn *conn, char* major_version); bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); +bool guc_setted_typed(PGconn *conn, const char *parameter, const char *op, + const char *value, const char *datatype); + const char *get_cluster_size(PGconn *conn); PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, int *master_id, char *master_conninfo_out); diff --git a/debian/DEBIAN/control b/debian/DEBIAN/control index 4cadb9e1..e96a799a 100644 --- a/debian/DEBIAN/control +++ b/debian/DEBIAN/control @@ -1,9 +1,9 @@ Package: repmgr-auto -Version: 1.0-1 +Version: 2.0beta2 Section: database Priority: optional Architecture: all -Depends: rsync, postgresql-9.0 -Maintainer: Greg Smith +Depends: rsync, postgresql-9.0 | postgresql-9.1 | postgresql-9.2 | postgresql-9.3 +Maintainer: Jaime Casanova Description: PostgreSQL replication setup, magament and monitoring has two main executables diff --git a/debian/repmgr.repmgrd.default b/debian/repmgr.repmgrd.default new file mode 100644 index 00000000..bd57bf6a --- /dev/null +++ b/debian/repmgr.repmgrd.default @@ -0,0 +1,14 @@ +#!/bin/sh +# default settings for repmgrd. This file is source by /bin/sh from +# /etc/init.d/repmgrd + +# Options for repmgrd +REPMGRD_OPTS="" + +# repmgrd binary +REPMGR_BIN="/usr/bin/repmgr" + +# pid file +REPMGR_PIDFILE="/var/run/repmgrd.pid" + + diff --git a/debian/repmgr.repmgrd.init b/debian/repmgr.repmgrd.init new file mode 100644 index 00000000..11bd2dee --- /dev/null +++ b/debian/repmgr.repmgrd.init @@ -0,0 +1,48 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: repmgrd +# Required-Start: $local_fs $remote_fs $network $syslog $postgresql +# Required-Stop: $local_fs $remote_fs $network $syslog $postgresql +# Should-Start: $syslog $postgresql +# Should-Start: $syslog $postgresql +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Start/stop repmgrd +### END INIT INFO + +set -e + +if test -f /etc/default/repmgrd; then + . /etc/default/repmgrd +fi + +if [ -z "$REPMGRD_BIN" ]; then + REPMGRD_BIN="/usr/bin/repmgrd" +fi + +if [ -z "$REPMGRD_PIDFILE" ]; then + REPMGRD_PIDFILE="/var/run/repmgrd.pid" +fi + +test -x $REPMGRD_BIN || exit 0 + +case "$1" in + start) + start-stop-daemon --start --quiet --make-pidfile --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN $REPMGRD_OPTS + ;; + + stop) + start-stop-daemon --stop --oknodo --quiet --pidfile $REPMGRD_PIDFILE + ;; + + restart) + $0 stop && $0 start || exit 1 + ;; + + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 diff --git a/errcode.h b/errcode.h index 208643c3..9bb84906 100644 --- a/errcode.h +++ b/errcode.h @@ -35,5 +35,6 @@ #define ERR_STR_OVERFLOW 10 #define ERR_FAILOVER_FAIL 11 #define ERR_BAD_SSH 12 +#define ERR_SYS_FAILURE 13 #endif /* _ERRCODE_H_ */ diff --git a/log.c b/log.c index e56bd4f8..41e18f2c 100644 --- a/log.c +++ b/log.c @@ -25,9 +25,11 @@ #ifdef HAVE_SYSLOG #include -#include #endif +#include +#include + #include "log.h" #define DEFAULT_IDENT "repmgr" @@ -37,6 +39,29 @@ /* #define REPMGR_DEBUG */ +void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) { + size_t len = strlen(fmt); + char fmt1[len + 150]; + time_t t; + struct tm *tm; + char buff[100]; + va_list ap; + + if(log_level >= level) { + time(&t); + tm = localtime(&t); + + va_start(ap, fmt); + + strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm); + snprintf(fmt1, len + 150, "%s [%s] %s", buff, level_name, fmt); + vfprintf(stderr, fmt1, ap); + + va_end(ap); + } +} + + static int detect_log_level(const char* level); static int detect_log_facility(const char* facility); diff --git a/log.h b/log.h index c7c2af3c..643b3f69 100644 --- a/log.h +++ b/log.h @@ -25,15 +25,17 @@ #define REPMGR_SYSLOG 1 #define REPMGR_STDERR 2 +void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...); + /* Standard error logging */ -#define stderr_log_debug(...) if (log_level >= LOG_DEBUG) fprintf(stderr, __VA_ARGS__) -#define stderr_log_info(...) if (log_level >= LOG_INFO) fprintf(stderr, __VA_ARGS__) -#define stderr_log_notice(...) if (log_level >= LOG_NOTICE) fprintf(stderr, __VA_ARGS__) -#define stderr_log_warning(...) if (log_level >= LOG_WARNING) fprintf(stderr, __VA_ARGS__) -#define stderr_log_err(...) if (log_level >= LOG_ERR) fprintf(stderr, __VA_ARGS__) -#define stderr_log_crit(...) if (log_level >= LOG_CRIT) fprintf(stderr, __VA_ARGS__) -#define stderr_log_alert(...) if (log_level >= LOG_ALERT) fprintf(stderr, __VA_ARGS__) -#define stderr_log_emerg(...) if (log_level >= LOG_EMERG) fprintf(stderr, __VA_ARGS__) +#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__) +#define stderr_log_info(...) stderr_log_with_level("INFO", LOG_INFO, __VA_ARGS__) +#define stderr_log_notice(...) stderr_log_with_level("NOTICE", LOG_NOTICE, __VA_ARGS__) +#define stderr_log_warning(...) stderr_log_with_level("WARNING", LOG_WARNING, __VA_ARGS__) +#define stderr_log_err(...) stderr_log_with_level("ERROR", LOG_ERR, __VA_ARGS__) +#define stderr_log_crit(...) stderr_log_with_level("CRITICAL", LOG_CRIT, __VA_ARGS__) +#define stderr_log_alert(...) stderr_log_with_level("ALERT", LOG_ALERT, __VA_ARGS__) +#define stderr_log_emerg(...) stderr_log_with_level("EMERGENCY", LOG_EMERG, __VA_ARGS__) #ifdef HAVE_SYSLOG diff --git a/repmgr.c b/repmgr.c index 6e6c1cb6..85847e86 100644 --- a/repmgr.c +++ b/repmgr.c @@ -85,8 +85,8 @@ bool need_a_node = true; bool require_password = false; /* Initialization of runtime options */ -t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; -t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 }; +t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER; +t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER; static char *server_mode = NULL; static char *server_cmd = NULL; @@ -396,7 +396,7 @@ do_cluster_show(void) if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("Cannot get node information, have you registered them?\n%s\n"), PQerrorMessage(conn)); + log_err(_("Can't get nodes information, have you registered them?\n%s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); exit(ERR_BAD_CONFIG); @@ -565,6 +565,22 @@ do_master_register(void) PGconn *master_conn; int id; + if (runtime_options.force) + { + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " + " WHERE id = %d", + repmgr_schema, options.node); + log_debug(_("master register: %s\n"), sqlquery); + + if (!PQexec(conn, sqlquery)) + { + log_warning(_("Cannot delete node details, %s\n"), + PQerrorMessage(conn)); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + } + /* Ensure there isn't any other master already registered */ master_conn = getMasterConnection(conn, repmgr_schema, options.cluster_name, &id,NULL); @@ -577,21 +593,6 @@ do_master_register(void) } /* Now register the master */ - if (runtime_options.force) - { - sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " - " WHERE id = %d", - repmgr_schema, options.node); - log_debug(_("master register: %s\n"), sqlquery); - - if (!PQexec(conn, sqlquery)) - { - log_warning(_("Cannot delete node details, %s\n"), - PQerrorMessage(conn)); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - } sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) " "VALUES (%d, '%s', '%s', '%s', %d)", @@ -742,7 +743,8 @@ do_standby_register(void) options.conninfo, options.priority); log_debug(_("standby register: %s\n"), sqlquery); - if (!PQexec(master_conn, sqlquery)) + res = PQexec(master_conn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) { log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(master_conn)); @@ -767,7 +769,7 @@ do_standby_clone(void) PGresult *res; char sqlquery[QUERY_STR_LEN]; - int r = 0; + int r = 0, retval = SUCCESS; int i; bool flag_success = false; bool test_mode = false; @@ -842,7 +844,7 @@ do_standby_clone(void) log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); exit(ERR_BAD_CONFIG); } - if (!guc_setted(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments)) + if (!guc_setted_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer")) { PQfinish(conn); log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments); @@ -1036,6 +1038,8 @@ do_standby_clone(void) { log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"), progname, local_data_directory); + r = ERR_BAD_CONFIG; + retval = ERR_BAD_CONFIG; goto stop_backup; } @@ -1175,7 +1179,7 @@ stop_backup: log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(ERR_STOP_BACKUP); + exit(retval); } last_wal_segment = PQgetvalue(res, 0, 0); @@ -1313,13 +1317,12 @@ do_standby_promote(void) rename(recovery_file_path, recovery_done_path); /* - * We assume the pg_ctl script is in the PATH. Restart and wait for - * the server to finish starting, so that the check below will - * find an active server rather than one starting up. This may + * Restart and wait for the server to finish starting, so that the check + * below will find an active server rather than one starting up. This may * hang for up the default timeout (60 seconds). */ - log_notice(_("%s: restarting server using pg_ctl\n"), progname); - maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir); + log_notice(_("%s: restarting server using %s/pg_ctl\n"), progname, options.pg_bindir); + maxlen_snprintf(script, "%s/pg_ctl %s -D %s -w -m fast restart", options.pg_bindir, options.pgctl_options, data_dir); r = system(script); if (r != 0) { @@ -1464,8 +1467,7 @@ do_standby_follow(void) exit(ERR_BAD_CONFIG); /* Finally, restart the service */ - /* We assume the pg_ctl script is in the PATH */ - maxlen_snprintf(script, "pg_ctl -w -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "%s/pg_ctl %s -w -D %s -m fast restart", options.pg_bindir, options.pgctl_options, data_dir); r = system(script); if (r != 0) { @@ -1496,14 +1498,6 @@ do_witness_create(void) char master_hba_file[MAXLEN]; - /* Check this directory could be used as a PGDATA dir */ - if (!create_pgdir(runtime_options.dest_dir, runtime_options.force)) - { - log_err(_("witness create: couldn't create data directory (\"%s\") for witness"), - runtime_options.dest_dir); - exit(ERR_BAD_CONFIG); - } - /* Connection parameters for master only */ keywords[0] = "host"; values[0] = runtime_options.host; @@ -1545,6 +1539,15 @@ do_witness_create(void) exit(ERR_BAD_SSH); } + /* Check this directory could be used as a PGDATA dir */ + if (!create_pgdir(runtime_options.dest_dir, runtime_options.force)) + { + log_err(_("witness create: couldn't create data directory (\"%s\") for witness"), + runtime_options.dest_dir); + exit(ERR_BAD_CONFIG); + } + + /* * To create a witness server we need to: * 1) initialize the cluster @@ -1553,8 +1556,7 @@ do_witness_create(void) */ /* Create the cluster for witness */ - /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s init -o \"-W\"", runtime_options.dest_dir); + sprintf(script, "%s/pg_ctl %s -D %s init -o \"-W\"", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir); log_info("Initialize cluster for witness: %s.\n", script); r = system(script); @@ -1627,7 +1629,7 @@ do_witness_create(void) } /* start new instance */ - sprintf(script, "pg_ctl -w -D %s start", runtime_options.dest_dir); + sprintf(script, "%s/pg_ctl %s -w -D %s start", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir); log_info(_("Start cluster for witness: %s"), script); r = system(script); if (r != 0) @@ -1786,9 +1788,9 @@ test_ssh_connection(char *host, char *remote_user) /* Check if we have ssh connectivity to host before trying to rsync */ if (!remote_user[0]) - maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s", host, TRUEBIN_PATH); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s %s", options.ssh_options, host, TRUEBIN_PATH); else - maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s %s", host, remote_user, TRUEBIN_PATH); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH); log_debug(_("command is: %s"), script); r = system(script); @@ -1827,7 +1829,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, if (is_directory) { - strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); + strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_log* --exclude=pg_control --exclude=*.pid"); maxlen_snprintf(script, "rsync %s %s:%s/* %s", rsync_flags, host_string, remote_path, local_path); } diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 9daaf3f4..c9465cd1 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -11,7 +11,8 @@ node_name=standby2 # Connection information conninfo='host=192.168.204.104' -rsync_options=--archive --checksum --compress --progress --rsh=ssh +rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" +ssh_options=-o "StrictHostKeyChecking no" # How many seconds we wait for master response before declaring master failure master_response_timeout=60 @@ -33,3 +34,11 @@ loglevel=NOTICE # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER # Default: STDERR logfacility=STDERR + +# path to pg_ctl executable +pg_bindir=/usr/bin/ + +# +# you may add command line arguments for pg_ctl +# +# pg_ctl_options='-s' diff --git a/repmgr.h b/repmgr.h index 295a4645..a2a0ac8e 100644 --- a/repmgr.h +++ b/repmgr.h @@ -69,6 +69,8 @@ typedef struct int keep_history; } t_runtime_options; +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 } + #define SLEEP_MONITOR 2 #endif diff --git a/repmgrd.c b/repmgrd.c index a68532f9..b70a3807 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -22,6 +22,9 @@ #include +#include +#include + #include #include #include @@ -45,6 +48,27 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0}; #endif +#if PG_VERSION_NUM >= 90300 + #define XLAssign(a, b) \ + a = b + + #define XLAssignValue(a, xlogid, xrecoff) \ + a = xrecoff + + #define XLByteLT(a, b) \ + (a < b) + +#else + #define XLAssign(a, b) \ + a.xlogid = b.xlogid; \ + a.xrecoff = b.xrecoff + + #define XLAssignValue(a, uxlogid, uxrecoff) \ + a.xlogid = uxlogid; \ + a.xrecoff = uxrecoff +#endif + + /* * Struct to keep info about the nodes, used in the voting process in * do_failover() @@ -52,8 +76,10 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0}; typedef struct nodeInfo { int nodeId; + char conninfostr[MAXLEN]; XLogRecPtr xlog_location; bool is_ready; + bool is_visible; bool is_witness; } nodeInfo; @@ -79,11 +105,15 @@ bool verbose = false; bool monitoring_history = false; char repmgr_schema[MAXLEN]; +bool failover_done = false; + +char *pid_file = NULL; + /* * should initialize with {0} to be ANSI complaint ? but this raises * error with gcc -Wall */ -t_configuration_options config = {}; +t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER; static void help(const char* progname); static void usage(void); @@ -107,7 +137,10 @@ static volatile sig_atomic_t got_SIGHUP = false; static void handle_sighup(SIGNAL_ARGS); static void handle_sigint(SIGNAL_ARGS); + +#ifndef WIN32 static void setup_event_handlers(void); +#endif #define CloseConnections() \ if (PQisBusy(primaryConn) == 1) \ @@ -126,11 +159,14 @@ main(int argc, char **argv) {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"monitoring-history", no_argument, NULL, 'm'}, + {"daemonize", no_argument, NULL, 'd'}, + {"pid-file", required_argument, NULL, 'p'}, {NULL, 0, NULL, 0} }; int optindex; int c; + bool daemonize = false; char standby_version[MAXVERSIONSTR]; @@ -150,7 +186,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "f:v:m", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1) { switch (c) { @@ -163,13 +199,67 @@ main(int argc, char **argv) case 'm': monitoring_history = true; break; + case 'd': + daemonize = true; + break; + case 'p': + pid_file = optarg; + break; default: usage(); exit(ERR_BAD_CONFIG); } } + if (daemonize) + { + pid_t pid = fork(); + switch (pid) + { + case -1: + log_err("Error in fork(): %s\n", strerror(errno)); + exit(ERR_SYS_FAILURE); + break; + + case 0: /* child process */ + pid = setsid(); + if (pid == (pid_t)-1) + { + log_err("Error in setsid(): %s\n", strerror(errno)); + exit(ERR_SYS_FAILURE); + } + break; + + default: /* parent process */ + exit(0); + } + } + + if (pid_file) + { + struct stat st; + FILE *fd; + + if (stat(pid_file, &st) != -1) + { + log_err("PID file %s exists. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file); + exit(ERR_BAD_CONFIG); + } + + fd = fopen(pid_file, "w"); + if (fd == NULL) + { + log_err("Could not open PID file %s!\n", pid_file); + exit(ERR_BAD_CONFIG); + } + + fprintf(fd, "%d", getpid()); + fclose(fd); + } + + #ifndef WIN32 setup_event_handlers(); + #endif /* * Read the configuration file: repmgr.conf @@ -201,63 +291,37 @@ main(int argc, char **argv) exit(ERR_BAD_CONFIG); } + /* - * Set my server mode, establish a connection to primary - * and start monitor - */ - if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) - myLocalMode = WITNESS_MODE; - else if (is_standby(myLocalConn)) - myLocalMode = STANDBY_MODE; - else /* is the master */ - myLocalMode = PRIMARY_MODE; - - switch (myLocalMode) + * MAIN LOOP + * This loops cicles once per failover and at startup + * Requisites: + * - myLocalConn needs to be already setted with an active connection + * - no master connection + */ + do { - case PRIMARY_MODE: - primary_options.node = local_options.node; - strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); - primaryConn = myLocalConn; - - checkClusterConfiguration(myLocalConn, primaryConn); - checkNodeConfiguration(local_options.conninfo); - - if (reload_configuration(config_file, &local_options)) - { - PQfinish(myLocalConn); - myLocalConn = establishDBConnection(local_options.conninfo, true); - primaryConn = myLocalConn; - update_registration(); - } - - log_info(_("%s Starting continuous primary connection check\n"), progname); - /* Check that primary is still alive, and standbies are sending info */ /* - * Every SLEEP_MONITOR seconds, do master checks - * XXX - * Check that standbies are sending info - */ - for (;;) - { - if (CheckPrimaryConnection()) - { - /* - CheckActiveStandbiesConnections(); - CheckInactiveStandbies(); - */ - sleep(SLEEP_MONITOR); - } - else - { - /* XXX - * May we do something more verbose ? - */ - exit (1); - } + * Set my server mode, establish a connection to primary + * and start monitor + */ + if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) + myLocalMode = WITNESS_MODE; + else if (is_standby(myLocalConn)) + myLocalMode = STANDBY_MODE; + else /* is the master */ + myLocalMode = PRIMARY_MODE; + + switch (myLocalMode) + { + case PRIMARY_MODE: + primary_options.node = local_options.node; + strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); + primaryConn = myLocalConn; + + checkClusterConfiguration(myLocalConn, primaryConn); + checkNodeConfiguration(local_options.conninfo); - if (got_SIGHUP) - { - /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); @@ -265,70 +329,112 @@ main(int argc, char **argv) primaryConn = myLocalConn; update_registration(); } - got_SIGHUP = false; - } - } - break; - case WITNESS_MODE: - case STANDBY_MODE: - /* I need the id of the primary as well as a connection to it */ - log_info(_("%s Connecting to primary for cluster '%s'\n"), - progname, local_options.cluster_name); - primaryConn = getMasterConnection(myLocalConn, repmgr_schema, - local_options.cluster_name, - &primary_options.node, NULL); - if (primaryConn == NULL) - { - CloseConnections(); - exit(ERR_BAD_CONFIG); - } - checkClusterConfiguration(myLocalConn, primaryConn); - checkNodeConfiguration(local_options.conninfo); + log_info(_("%s Starting continuous primary connection check\n"), progname); - if (reload_configuration(config_file, &local_options)) - { - PQfinish(myLocalConn); - myLocalConn = establishDBConnection(local_options.conninfo, true); - update_registration(); - } + /* Check that primary is still alive, and standbies are sending info */ - /* - * Every SLEEP_MONITOR seconds, do checks - */ - if (myLocalMode == WITNESS_MODE) - { - log_info(_("%s Starting continuous witness node monitoring\n"), progname); - } - else if (myLocalMode == STANDBY_MODE) - { - log_info(_("%s Starting continuous standby node monitoring\n"), progname); - } + /* + * Every SLEEP_MONITOR seconds, do master checks + * XXX + * Check that standbies are sending info + */ + do + { + if (CheckPrimaryConnection()) + { + /* + CheckActiveStandbiesConnections(); + CheckInactiveStandbies(); + */ + sleep(SLEEP_MONITOR); + } + else + { + /* XXX + * May we do something more verbose ? + */ + exit(1); + } - for (;;) - { - if (myLocalMode == WITNESS_MODE) - WitnessMonitor(); - else if (myLocalMode == STANDBY_MODE) - StandbyMonitor(); - sleep(SLEEP_MONITOR); + if (got_SIGHUP) + { + /* if we can reload, then could need to change myLocalConn */ + if (reload_configuration(config_file, &local_options)) + { + PQfinish(myLocalConn); + myLocalConn = establishDBConnection(local_options.conninfo, true); + primaryConn = myLocalConn; + update_registration(); + } + got_SIGHUP = false; + } + } while (!failover_done); + break; + case WITNESS_MODE: + case STANDBY_MODE: + /* I need the id of the primary as well as a connection to it */ + log_info(_("%s Connecting to primary for cluster '%s'\n"), + progname, local_options.cluster_name); + primaryConn = getMasterConnection(myLocalConn, repmgr_schema, + local_options.cluster_name, + &primary_options.node, NULL); + if (primaryConn == NULL) + { + CloseConnections(); + exit(ERR_BAD_CONFIG); + } + + checkClusterConfiguration(myLocalConn, primaryConn); + checkNodeConfiguration(local_options.conninfo); - if (got_SIGHUP) - { - /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } - got_SIGHUP = false; - } + + /* + * Every SLEEP_MONITOR seconds, do checks + */ + if (myLocalMode == WITNESS_MODE) + { + log_info(_("%s Starting continuous witness node monitoring\n"), progname); + } + else if (myLocalMode == STANDBY_MODE) + { + log_info(_("%s Starting continuous standby node monitoring\n"), progname); + } + + do + { + if (myLocalMode == WITNESS_MODE) + WitnessMonitor(); + else if (myLocalMode == STANDBY_MODE) + StandbyMonitor(); + sleep(SLEEP_MONITOR); + + if (got_SIGHUP) + { + /* if we can reload, then could need to change myLocalConn */ + if (reload_configuration(config_file, &local_options)) + { + PQfinish(myLocalConn); + myLocalConn = establishDBConnection(local_options.conninfo, true); + update_registration(); + } + got_SIGHUP = false; + } + } while (!failover_done); + break; + default: + log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } - break; - default: - log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); - } + + failover_done = false; + + } while (true); /* Prevent a double-free */ if (primaryConn == myLocalConn) @@ -356,7 +462,7 @@ WitnessMonitor(void) * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, return false. */ - CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds + CheckPrimaryConnection(); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */ if (PQstatus(primaryConn) != CONNECTION_OK) { @@ -441,7 +547,7 @@ StandbyMonitor(void) * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, try to get a new master. */ - CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds + CheckPrimaryConnection(); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */ if (PQstatus(primaryConn) != CONNECTION_OK) { @@ -479,6 +585,7 @@ StandbyMonitor(void) * a new primaryConn */ do_failover(); + return; } } @@ -570,23 +677,23 @@ StandbyMonitor(void) static void do_failover(void) { - PGresult *res1; - PGresult *res2; + PGresult *res; char sqlquery[8192]; int total_nodes = 0; int visible_nodes = 0; + int ready_nodes = 0; + bool find_best = false; bool witness = false; int i; int r; - int node; - char nodeConninfo[MAXLEN]; + uint32 uxlogid; + uint32 uxrecoff; + XLogRecPtr xlog_recptr; - unsigned int uxlogid; - unsigned int uxrecoff; char last_wal_standby_applied[MAXLEN]; PGconn *nodeConn = NULL; @@ -596,108 +703,62 @@ do_failover(void) * which seems to be large enough for most scenarios */ nodeInfo nodes[50]; + /* initialize to keep compiler quiet */ - nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false, false}; - - /* first we get info about this node, and update shared memory */ - sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); - res1 = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res1) != PGRES_TUPLES_OK) - { - log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn)); - PQclear(res1); - sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); - update_shared_memory(last_wal_standby_applied); - exit(ERR_DB_QUERY); - } - - /* write last location in shared memory */ - update_shared_memory(PQgetvalue(res1, 0, 0)); - - /* - * we sleep the monitor time + one second - * we bet it should be enough for other repmgrd to update their own data - */ - sleep(SLEEP_MONITOR + 1); + nodeInfo best_candidate = {-1, "", InvalidXLogRecPtr, false, false, false}; /* get a list of standby nodes, including myself */ sprintf(sqlquery, "SELECT id, conninfo, witness " " FROM %s.repl_nodes " - " WHERE id <> %d " - " AND cluster = '%s' " + " WHERE cluster = '%s' " " ORDER BY priority, id ", - repmgr_schema, primary_options.node, local_options.cluster_name); + repmgr_schema, local_options.cluster_name); - res1 = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res1) != PGRES_TUPLES_OK) + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("Can't get nodes info: %s\n"), PQerrorMessage(myLocalConn)); - PQclear(res1); + log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn)); + PQclear(res); PQfinish(myLocalConn); exit(ERR_DB_QUERY); } - log_debug(_("%s: there are %d nodes registered"), progname, PQntuples(res1)); - /* ask for the locations */ - for (i = 0; i < PQntuples(res1); i++) + /* + * total nodes that are registered + */ + total_nodes = PQntuples(res); + log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes); + + /* Build an array with the nodes and indicate which ones are visible and ready */ + for (i = 0; i < total_nodes; i++) { - node = atoi(PQgetvalue(res1, i, 0)); + nodes[i].nodeId = atoi(PQgetvalue(res, i, 0)); + strncpy(nodes[i].conninfostr, PQgetvalue(res, i, 1), MAXLEN); + nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false; + /* Initialize on false so if we can't reach this node we know that later */ + nodes[i].is_visible = false; nodes[i].is_ready = false; - strncpy(nodeConninfo, PQgetvalue(res1, i, 1), MAXLEN); - witness = (strcmp(PQgetvalue(res1, i, 2), "t") == 0) ? true : false; - log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s"), progname, node, nodeConninfo, (witness) ? "true" : "false"); + XLAssignValue(nodes[i].xlog_location, 0, 0); + + log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), + progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false"); + + nodeConn = establishDBConnection(nodes[i].conninfostr, false); - nodeConn = establishDBConnection(nodeConninfo, false); /* if we can't see the node just skip it */ if (PQstatus(nodeConn) != CONNECTION_OK) continue; - /* the witness will always show 0/0 so avoid a useless query */ - if (!witness) - { - sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); - res2 = PQexec(nodeConn, sqlquery); - if (PQresultStatus(res2) != PGRES_TUPLES_OK) - { - log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); - log_info(_("Connection details: %s\n"), nodeConninfo); - PQclear(res2); - PQfinish(nodeConn); - continue; - } - - if (sscanf(PQgetvalue(res2, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) - log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res2, 0, 0)); - - PQclear(res2); - } - else - { - uxlogid = 0; - uxrecoff = 0; - } - visible_nodes++; - - nodes[i].nodeId = node; - nodes[i].xlog_location.xlogid = uxlogid; - nodes[i].xlog_location.xrecoff = uxrecoff; - nodes[i].is_ready = true; - nodes[i].is_witness = witness; + nodes[i].is_visible = true; PQfinish(nodeConn); } - PQclear(res1); - /* Close the connection to this server */ - PQfinish(myLocalConn); + PQclear(res); - /* - * total nodes that are registered, include master which is a node but was - * not counted because it's not a standby - */ - total_nodes = i + 1; + log_debug(_("Total nodes counted: registered=%d, visible=%d\n"), total_nodes, visible_nodes); /* * am i on the group that should keep alive? @@ -711,24 +772,169 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* Query all the nodes to determine which ones are ready */ + for (i = 0; i < total_nodes; i++) + { + /* if the node is not visible, skip it */ + if (!nodes[i].is_visible) + continue; + + if (nodes[i].is_witness) + continue; + + nodeConn = establishDBConnection(nodes[i].conninfostr, false); + /* XXX + * This shouldn't happen, if this happens it means this is a major problem + * maybe network outages? anyway, is better for a human to react + */ + if (PQstatus(nodeConn) != CONNECTION_OK) + { + log_err(_("It seems new problems are arising, manual intervention is needed\n")); + exit(ERR_FAILOVER_FAIL); + } + + sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); + res = PQexec(nodeConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); + log_info(_("Connection details: %s\n"), nodes[i].conninfostr); + PQclear(res); + PQfinish(nodeConn); + exit(ERR_FAILOVER_FAIL); + } + + if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) + log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); + + log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", + nodes[i].nodeId, uxlogid, uxlogid, uxrecoff, uxrecoff); + + /* If position is 0/0, error */ + if (uxlogid == 0 && uxrecoff == 0) + { + PQclear(res); + PQfinish(nodeConn); + log_info(_("InvalidXLogRecPtr detected in a standby\n")); + exit(ERR_FAILOVER_FAIL); + } + + XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); + + PQclear(res); + PQfinish(nodeConn); + } + + /* last we get info about this node, and update shared memory */ + sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn)); + PQfinish(myLocalConn); + PQclear(res); + sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); + update_shared_memory(last_wal_standby_applied); + exit(ERR_DB_QUERY); + } + + /* write last location in shared memory */ + update_shared_memory(PQgetvalue(res, 0, 0)); + PQclear(res); + + for (i = 0; i < total_nodes; i++) + { + while (!nodes[i].is_ready) + { + /* + * the witness will always be masked as ready if it's still + * not marked that way and avoid a useless query + */ + if (nodes[i].is_witness) + { + if (!nodes[i].is_ready) + { + nodes[i].is_ready = true; + ready_nodes++; + } + break; + } + + /* if the node is not visible, skip it */ + if (!nodes[i].is_visible) + break; + + /* if the node is ready there is nothing to check, skip it too */ + if (nodes[i].is_ready) + break; + + nodeConn = establishDBConnection(nodes[i].conninfostr, false); + /* XXX + * This shouldn't happen, if this happens it means this is a major problem + * maybe network outages? anyway, is better for a human to react + */ + if (PQstatus(nodeConn) != CONNECTION_OK) + { + /* XXX */ + log_info(_("At this point, it could be some race conditions that are acceptable, assume the node is restarting and starting failover procedure\n")); + break; + } + + sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); + res = PQexec(nodeConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn)); + PQclear(res); + PQfinish(nodeConn); + exit(ERR_DB_QUERY); + } + + if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) + log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); + + PQclear(res); + PQfinish(nodeConn); + /* If position is 0/0, keep checking */ + if (uxlogid == 0 && uxrecoff == 0) + continue; + + XLAssignValue(xlog_recptr, uxlogid, uxrecoff); + + if (XLByteLT(nodes[i].xlog_location, xlog_recptr)) + { + XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); + } + + log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", + nodes[i].nodeId, uxlogid, uxlogid, + uxrecoff, uxrecoff); + + ready_nodes++; + nodes[i].is_ready = true; + } + } + + /* Close the connection to this server */ + PQfinish(myLocalConn); + /* * determine which one is the best candidate to promote to primary */ - for (i = 0; i < total_nodes - 1; i++) + for (i = 0; i < total_nodes; i++) { /* witness is never a good candidate */ if (nodes[i].is_witness) continue; - if (!nodes[i].is_ready) + if (!nodes[i].is_ready || !nodes[i].is_visible) continue; if (!find_best) { /* start with the first ready node, and then move on to the next one */ best_candidate.nodeId = nodes[i].nodeId; - best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; - best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; + XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; find_best = true; @@ -743,8 +949,7 @@ do_failover(void) if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location)) { best_candidate.nodeId = nodes[i].nodeId; - best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; - best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; + XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; } @@ -759,6 +964,9 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* wait */ + sleep(5); + if (verbose) log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"), progname); @@ -772,6 +980,9 @@ do_failover(void) } else if (find_best) { + /* wait */ + sleep(10); + if (verbose) log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"), progname, best_candidate.nodeId); @@ -793,6 +1004,9 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* to force it to re-calculate mode and master node */ + failover_done = true; + /* and reconnect to the local database */ myLocalConn = establishDBConnection(local_options.conninfo, true); } @@ -972,6 +1186,8 @@ void help(const char *progname) printf(_(" --verbose output verbose activity information\n")); printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n")); printf(_(" -f, --config_file=PATH configuration file\n")); + printf(_(" -d, --daemonize detach process from foreground\n")); + printf(_(" -p, --pid-file=PATH write a PID file\n")); printf(_("\n%s monitors a cluster of servers.\n"), progname); } @@ -981,6 +1197,13 @@ static void handle_sigint(SIGNAL_ARGS) { CloseConnections(); + logger_shutdown(); + + if (pid_file) + { + remove(pid_file); + } + exit(1); } @@ -996,6 +1219,7 @@ setup_event_handlers(void) { pqsignal(SIGHUP, handle_sighup); pqsignal(SIGINT, handle_sigint); + pqsignal(SIGTERM, handle_sigint); } #endif diff --git a/version.h b/version.h index 97c2c265..95c94382 100644 --- a/version.h +++ b/version.h @@ -1,4 +1,5 @@ #ifndef _VERSION_H_ #define _VERSION_H_ + #define REPMGR_VERSION "2.1dev" #endif