From d43c6334da83376f2459bf88197190245a8b7755 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sat, 21 Jul 2012 12:06:33 -0500 Subject: [PATCH 01/51] Prepare HISTORY and release notes for release --- HISTORY | 9 ++++++--- version.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/HISTORY b/HISTORY index 6acdcddc..0f783a1d 100644 --- a/HISTORY +++ b/HISTORY @@ -31,12 +31,15 @@ 1.1.0 2011-03-09 Make options -U, -R and -p not mandatory (Jaime) -1.1.1 2011-XX-XX +1.1.1 2012-04-18 Add --ignore-rsync-warning (Cédric) Add strnlen for compatibility with OS X (Greg) Improve performance of the repl_status view (Jaime) + Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers) + Complete documentation about possible error conditions (Jaime) + Document how to clean history (Jaime) -1.2.0 2012-06-15 +1.2.0 2012-07-27 Test ssh connection before trying to rsync (Cédric) Add CLUSTER SHOW command (Carlo) Add CLUSTER CLEANUP command (Jaime) @@ -44,7 +47,7 @@ Teach repmgr how to get tablespace's location in different pg version (Jaime) Improve version message (Carlo) -2.0.0 2012-XX-XX +2.0beta 2012-07-27 Make CLONE command try to make an exact copy including $PGDATA location (Cedric) Add detection of master failure (Jaime) Add the notion of a witness server (Jaime) diff --git a/version.h b/version.h index f5fbf579..4e168bb1 100644 --- a/version.h +++ b/version.h @@ -1,4 +1,4 @@ #ifndef _VERSION_H_ #define _VERSION_H_ -#define REPMGR_VERSION "1.2.0" +#define REPMGR_VERSION "2.0beta1" #endif From 664e1a8321aa4b9d61b7fc2b416bf5dca9819a38 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sat, 21 Jul 2012 17:49:38 -0500 Subject: [PATCH 02/51] Now that we can have no monitoring we need to check all nodes at failover not only those in repl_monitor --- repmgrd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index aae9b169..8bc47d38 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -608,11 +608,10 @@ do_failover(void) /* get a list of standby nodes, including myself */ sprintf(sqlquery, "SELECT id, conninfo " " FROM %s.repl_nodes " - " WHERE id IN (SELECT standby_node FROM %s.repl_status) " - " AND id <> %d " + " WHERE id <> %d " " AND cluster = '%s' " " ORDER BY priority ", - repmgr_schema, repmgr_schema, primary_options.node, local_options.cluster_name); + repmgr_schema, primary_options.node, local_options.cluster_name); res1 = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) From 740208da1c0701f7a8820b18b149e696b8474377 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 27 Jul 2012 11:15:50 -0500 Subject: [PATCH 03/51] Fix typos in RELEASE NOTES --- HISTORY | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HISTORY b/HISTORY index 0f783a1d..202d4529 100644 --- a/HISTORY +++ b/HISTORY @@ -52,6 +52,6 @@ Add detection of master failure (Jaime) Add the notion of a witness server (Jaime) Add autofailover capabilities (Jaime) - Add a configuration parameter to indicate the script ti execute on failover or follow (Jaime) - Make the monitoring optional and turned off by default, in can be turned on with --monitoring-history switch (Jaime) + Add a configuration parameter to indicate the script to execute on failover or follow (Jaime) + Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime) Add tunables to specify number of retries to reconnect to master and the time between them (Jaime) From 3edd87a04137c68796ed45b5691de1f4c7b90096 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 27 Jul 2012 11:20:56 -0500 Subject: [PATCH 04/51] Fix tabs in HISTORY --- HISTORY | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/HISTORY b/HISTORY index 202d4529..c3a7d8b6 100644 --- a/HISTORY +++ b/HISTORY @@ -49,9 +49,9 @@ 2.0beta 2012-07-27 Make CLONE command try to make an exact copy including $PGDATA location (Cedric) - Add detection of master failure (Jaime) - Add the notion of a witness server (Jaime) + Add detection of master failure (Jaime) + Add the notion of a witness server (Jaime) Add autofailover capabilities (Jaime) - Add a configuration parameter to indicate the script to execute on failover or follow (Jaime) - Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime) - Add tunables to specify number of retries to reconnect to master and the time between them (Jaime) + Add a configuration parameter to indicate the script to execute on failover or follow (Jaime) + Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime) + Add tunables to specify number of retries to reconnect to master and the time between them (Jaime) From 56d2ae4e81fd1e5de4f592ab20181692eef6b78d Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 27 Jul 2012 11:26:18 -0500 Subject: [PATCH 05/51] Fix HISTORY to show from newest to oldest --- HISTORY | 58 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/HISTORY b/HISTORY index c3a7d8b6..bfd36a17 100644 --- a/HISTORY +++ b/HISTORY @@ -1,5 +1,30 @@ -1.0.0 2010-12-05 - First public release +2.0beta 2012-07-27 + Make CLONE command try to make an exact copy including $PGDATA location (Cedric) + Add detection of master failure (Jaime) + Add the notion of a witness server (Jaime) + Add autofailover capabilities (Jaime) + Add a configuration parameter to indicate the script to execute on failover or follow (Jaime) + Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime) + Add tunables to specify number of retries to reconnect to master and the time between them (Jaime) + +1.2.0 2012-07-27 + Test ssh connection before trying to rsync (Cédric) + Add CLUSTER SHOW command (Carlo) + Add CLUSTER CLEANUP command (Jaime) + Add function write_primary_conninfo (Marco) + Teach repmgr how to get tablespace's location in different pg version (Jaime) + Improve version message (Carlo) + +1.1.1 2012-04-18 + Add --ignore-rsync-warning (Cédric) + Add strnlen for compatibility with OS X (Greg) + Improve performance of the repl_status view (Jaime) + Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers) + Complete documentation about possible error conditions (Jaime) + Document how to clean history (Jaime) + +1.1.0 2011-03-09 + Make options -U, -R and -p not mandatory (Jaime) 1.1.0b1 2011-02-24 Fix missing "--force" option in help (Greg Smith) @@ -28,30 +53,5 @@ Map old verbose flag into a useful setting for the new logger (Greg) Document repmgrd startup restrictions and log info about them (Greg) -1.1.0 2011-03-09 - Make options -U, -R and -p not mandatory (Jaime) - -1.1.1 2012-04-18 - Add --ignore-rsync-warning (Cédric) - Add strnlen for compatibility with OS X (Greg) - Improve performance of the repl_status view (Jaime) - Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers) - Complete documentation about possible error conditions (Jaime) - Document how to clean history (Jaime) - -1.2.0 2012-07-27 - Test ssh connection before trying to rsync (Cédric) - Add CLUSTER SHOW command (Carlo) - Add CLUSTER CLEANUP command (Jaime) - Add function write_primary_conninfo (Marco) - Teach repmgr how to get tablespace's location in different pg version (Jaime) - Improve version message (Carlo) - -2.0beta 2012-07-27 - Make CLONE command try to make an exact copy including $PGDATA location (Cedric) - Add detection of master failure (Jaime) - Add the notion of a witness server (Jaime) - Add autofailover capabilities (Jaime) - Add a configuration parameter to indicate the script to execute on failover or follow (Jaime) - Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime) - Add tunables to specify number of retries to reconnect to master and the time between them (Jaime) +1.0.0 2010-12-05 + First public release From 57aa95f67441855b0ec3bf4754e412a656a4f053 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 30 Aug 2012 02:10:10 -0500 Subject: [PATCH 06/51] Fix documentation to always use -h sintax to refer to the node we want to clone or connect to, instead of relying on the fact that for some time putting that argument at last worked. --- autofailover_quick_setup.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autofailover_quick_setup.rst b/autofailover_quick_setup.rst index d5b763ce..a7987ec1 100644 --- a/autofailover_quick_setup.rst +++ b/autofailover_quick_setup.rst @@ -112,7 +112,7 @@ Log in node2. Clone the node1 (the current Master):: su - postgres - repmgr -d repmgr -U repmgr standby clone node1 + repmgr -d repmgr -U repmgr -h node1 standby clone Start the PostgreSQL server:: @@ -173,7 +173,7 @@ Log in witness. Initialize the witness server:: su - postgres - repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create node1 + repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create It needs information to connect to the master to copy the configuration of the cluster, also it needs to know where it should initialize it's own $PGDATA. As part of the procees it also ask for the superuser password so it can connect when needed. From 95ec0450da5f55018b7d139048037a3d54161537 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 30 Aug 2012 02:11:48 -0500 Subject: [PATCH 07/51] When we have more command-line arguments than we should have we need to show that last value and we should use only optind for that instead of optind+1 --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 68e5db16..be5ca394 100644 --- a/repmgr.c +++ b/repmgr.c @@ -269,7 +269,7 @@ main(int argc, char **argv) break; default: log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"), - progname, argv[optind + 1]); + progname, argv[optind]); usage(); exit(ERR_BAD_CONFIG); } From 0a9107d76d9f3de389f74c6dd90d6707e3928357 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Tue, 11 Sep 2012 15:53:57 -0500 Subject: [PATCH 08/51] Improve sample of commands for promote and follow --- repmgr.conf.sample | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 9e8ad42b..6e81aa3f 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -23,8 +23,8 @@ reconnect_interval=10 # Autofailover options failover=automatic priority=-1 -promote_command='repmgr promote' -follow_command='repmgr follow' +promote_command='repmgr standby promote -f /path/to/repmgr.conf' +follow_command='repmgr standby follow -f /path/to/repmgr.conf' # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG # Default: NOTICE From 499a501afd01d2c61a153913ae56c48c4cc2a5ac Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sat, 15 Sep 2012 17:32:38 -0500 Subject: [PATCH 09/51] Make repmgr compatible with FreeBSD. We need to add an #include and make it use a different path for the "true" binary. Maybe we need to make this changes for all BSD systems but having no evidence of that i prefer to make this only for systems with __FreeBSD__ --- repmgr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index be5ca394..eb9d254c 100644 --- a/repmgr.c +++ b/repmgr.c @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -1762,11 +1763,18 @@ test_ssh_connection(char *host, char *remote_user) char script[MAXLEN]; int r; +/* On some OS, true is located in a different place than in Linux */ +#ifdef __FreeBSD__ +#define TRUEBIN_PATH "/usr/bin/true" +#else +#define TRUEBIN_PATH "/bin/true" +#endif + /* Check if we have ssh connectivity to host before trying to rsync */ if (!remote_user[0]) - maxlen_snprintf(script, "ssh -o Batchmode=yes %s /bin/true", host); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s", host, TRUEBIN_PATH); else - maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s /bin/true", host, remote_user); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s %s", host, remote_user, TRUEBIN_PATH); log_debug(_("command is: %s"), script); r = system(script); From de883a4c843a3631732e4ae2f3e8acd73012249e Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 16 Sep 2012 02:10:02 -0500 Subject: [PATCH 10/51] Keep compiler quiet. Noted when compiling in FreeBSD in which i get a warning for an uninitialized variable. Also, define InvalidXLogRecPtr. We don't really need it but using it make the initialization future proof (considering that in 9.3 XLogRecPtr will change its structure). --- repmgrd.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 8bc47d38..2c624858 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -35,6 +35,15 @@ #include "access/xlogdefs.h" #include "libpq/pqsignal.h" +/* + * we do not export InvalidXLogRecPtr so we need to define it + * but since 9.3 it will be defined in xlogdefs.h which we include + * so better to ask if it's defined to be future proof + */ +#ifndef InvalidXLogRecPtr +const XLogRecPtr InvalidXLogRecPtr = {0, 0}; +#endif + /* * Struct to keep info about the nodes, used in the voting process in * do_failover() @@ -582,7 +591,8 @@ do_failover(void) * which seems to be large enough for most scenarios */ nodeInfo nodes[50]; - nodeInfo best_candidate; + /* initialize to keep compiler quiet */ + nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false }; /* first we get info about this node, and update shared memory */ sprintf(sqlquery, "SELECT pg_last_xlog_replay_location()"); From 2e19b3688b11145f44da1767bcaf4e86f1cc57c9 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 16 Sep 2012 02:23:16 -0500 Subject: [PATCH 11/51] Add a comment --- repmgrd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/repmgrd.c b/repmgrd.c index 2c624858..e5187a3f 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -32,6 +32,7 @@ #include "strutil.h" #include "version.h" +/* PostgreSQL's headers needed to export some functionality */ #include "access/xlogdefs.h" #include "libpq/pqsignal.h" From cd1a84252e0f5141d063f4fe000472cb7c9f7174 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 16 Sep 2012 02:38:28 -0500 Subject: [PATCH 12/51] Fix node decision logic when priorities are involved. Currently if two nodes with different prorities are equally good to be promoted the second one (with a lower priority, considering them in descending order) will win. Per report from Brailean Dumitru --- repmgrd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index e5187a3f..24d612a9 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -708,13 +708,13 @@ do_failover(void) find_best = true; } - /* we use the macros provided by xlogdefs.h to compare XLogPtr */ + /* we use the macros provided by xlogdefs.h to compare XLogRecPtr */ /* * Nodes are retrieved ordered by priority, so if the current - * best candidate is lower or equal to the next node's wal location + * best candidate is lower than the next node's wal location * then assign next node as the new best candidate. */ - if (XLByteLE(best_candidate.xlog_location, nodes[i].xlog_location)) + if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location)) { best_candidate.nodeId = nodes[i].nodeId; best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; From 30e9d061728abfdcdc6ea6b94c0fab641f786a59 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 11 Nov 2012 10:07:34 -0500 Subject: [PATCH 13/51] Add an option for STANDBY FOLLOW to wait for a master to appear. This is important for autofailover to do the right thing when standbys detected master death at different times. While this is a new option, seems important for the autofailover to work properly so i will consider the lack of it a bug and will backpatch to 2.0 where autofailover was introduced. For gripe from Alex Railean, about a standby not finding the new master because the new master hasn't finish promoting. --- repmgr.c | 25 +++++++++++++++++++------ repmgr.conf.sample | 2 +- repmgr.h | 1 + 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/repmgr.c b/repmgr.c index eb9d254c..2d4772c6 100644 --- a/repmgr.c +++ b/repmgr.c @@ -85,7 +85,7 @@ bool need_a_node = true; bool require_password = false; /* Initialization of runtime options */ -t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", "", 0 }; +t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 }; static char *server_mode = NULL; @@ -107,6 +107,7 @@ main(int argc, char **argv) {"wal-keep-segments", required_argument, NULL, 'w'}, {"keep-history", required_argument, NULL, 'k'}, {"force", no_argument, NULL, 'F'}, + {"wait", no_argument, NULL, 'W'}, {"ignore-rsync-warning", no_argument, NULL, 'I'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} @@ -133,7 +134,7 @@ main(int argc, char **argv) } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:I:v", long_options, + while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:W:I:v", long_options, &optindex)) != -1) { switch (c) @@ -177,6 +178,9 @@ main(int argc, char **argv) case 'F': runtime_options.force = true; break; + case 'W': + runtime_options.wait_for_master = true; + break; case 'I': runtime_options.ignore_rsync_warn = true; break; @@ -1378,10 +1382,18 @@ do_standby_follow(void) exit(ERR_BAD_CONFIG); } - /* we also need to check if there is any master in the cluster */ - log_info(_("%s connecting to master database\n"), progname); - master_conn = getMasterConnection(conn, repmgr_schema, - options.cluster_name, &master_id,(char *) &master_conninfo); + /* + * we also need to check if there is any master in the cluster + * or wait for one to appear if we have set the wait option + */ + log_info(_("%s discovering new master...\n"), progname); + + do + { + master_conn = getMasterConnection(conn, repmgr_schema, + options.cluster_name, &master_id,(char *) &master_conninfo); + } while (master_conn == NULL && runtime_options.wait_for_master); + if (master_conn == NULL) { log_err(_("There isn't a master to follow in this cluster\n")); @@ -1700,6 +1712,7 @@ help(const char *progname) printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n")); printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n")); printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_(" -W, --wait wait for a master to appear")); printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("or making follow another node and then exits.\n")); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 6e81aa3f..9daaf3f4 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -24,7 +24,7 @@ reconnect_interval=10 failover=automatic priority=-1 promote_command='repmgr standby promote -f /path/to/repmgr.conf' -follow_command='repmgr standby follow -f /path/to/repmgr.conf' +follow_command='repmgr standby follow -f /path/to/repmgr.conf -W' # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG # Default: NOTICE diff --git a/repmgr.h b/repmgr.h index dcab7d46..295a4645 100644 --- a/repmgr.h +++ b/repmgr.h @@ -59,6 +59,7 @@ typedef struct char wal_keep_segments[MAXLEN]; bool verbose; bool force; + bool wait_for_master; bool ignore_rsync_warn; char masterport[MAXLEN]; From 088ca29fe3267a480684e3eb482db522c10d5817 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Mon, 3 Dec 2012 09:18:08 -0500 Subject: [PATCH 14/51] To select new master it needs to know which standby has received more xlog records from master, so it standby should use pg_last_xlog_receive_location() to report their positions. This solves a possible situation in which a standby that is considered as new master when promoted is no longer the best option. --- repmgrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 24d612a9..96e5ea81 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -596,7 +596,7 @@ do_failover(void) nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false }; /* first we get info about this node, and update shared memory */ - sprintf(sqlquery, "SELECT pg_last_xlog_replay_location()"); + sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); res1 = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { From 93a999adc70f751d61f54a64a27e07552267b057 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Tue, 11 Dec 2012 11:49:07 -0500 Subject: [PATCH 15/51] Formatting code using astyle --- check_dir.c | 6 ++-- dbutils.c | 12 +++---- dbutils.h | 2 +- repmgr.c | 102 +++++++++++++++++++++++++++------------------------- repmgrd.c | 22 ++++++------ 5 files changed, 75 insertions(+), 69 deletions(-) diff --git a/check_dir.c b/check_dir.c index c3a552b4..5ae4c096 100644 --- a/check_dir.c +++ b/check_dir.c @@ -127,10 +127,10 @@ mkdir_p(char *path, mode_t omode) { struct stat sb; mode_t numask, - oumask; + oumask; int first, - last, - retval; + last, + retval; char *p; p = path; diff --git a/dbutils.c b/dbutils.c index d2936c41..8ed67c2e 100644 --- a/dbutils.c +++ b/dbutils.c @@ -153,8 +153,8 @@ is_pgup(PGconn *conn, int timeout) sqlquery_snprintf(sqlquery, "SELECT 1"); if (PQsendQuery(conn, sqlquery) == 0) { - log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"), - PQerrorMessage(conn)); + log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"), + PQerrorMessage(conn)); goto failed; } if (wait_connection_availability(conn, timeout) != 1) @@ -396,7 +396,7 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, /* * wait until current query finishes ignoring any results, this could be an async command - * or a cancelation of a query + * or a cancelation of a query * return 1 if Ok; 0 if any error ocurred; -1 if timeout reached */ int @@ -408,11 +408,11 @@ wait_connection_availability(PGconn *conn, int timeout) { if (PQconsumeInput(conn) == 0) { - log_warning(_("PQconsumeInput: Query could not be sent to primary. %s\n"), - PQerrorMessage(conn)); + log_warning(_("PQconsumeInput: Query could not be sent to primary. %s\n"), + PQerrorMessage(conn)); return 0; } - + if (PQisBusy(conn) == 0) { res = PQgetResult(conn); diff --git a/dbutils.h b/dbutils.h index c6355758..90a6d390 100644 --- a/dbutils.h +++ b/dbutils.h @@ -31,7 +31,7 @@ bool is_witness(PGconn *conn, char *schema, char *cluster, int node_id); bool is_pgup(PGconn *conn, int timeout); char *pg_version(PGconn *conn, char* major_version); bool guc_setted(PGconn *conn, const char *parameter, const char *op, - const char *value); + const char *value); const char *get_cluster_size(PGconn *conn); PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, int *master_id, char *master_conninfo_out); diff --git a/repmgr.c b/repmgr.c index 2d4772c6..a48ff9a8 100644 --- a/repmgr.c +++ b/repmgr.c @@ -56,7 +56,7 @@ static bool create_recovery_file(const char *data_dir); static int test_ssh_connection(char *host, char *remote_user); static int copy_remote_files(char *host, char *remote_user, char *remote_path, - char *local_path, bool is_directory); + char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static bool create_schema(PGconn *conn); static bool copy_configuration(PGconn *masterconn, PGconn *witnessconn); @@ -105,7 +105,7 @@ main(int argc, char **argv) {"config-file", required_argument, NULL, 'f'}, {"remote-user", required_argument, NULL, 'R'}, {"wal-keep-segments", required_argument, NULL, 'w'}, - {"keep-history", required_argument, NULL, 'k'}, + {"keep-history", required_argument, NULL, 'k'}, {"force", no_argument, NULL, 'F'}, {"wait", no_argument, NULL, 'W'}, {"ignore-rsync-warning", no_argument, NULL, 'I'}, @@ -173,7 +173,7 @@ main(int argc, char **argv) if (atoi(optarg) > 0) runtime_options.keep_history = atoi(optarg); else - runtime_options.keep_history = 0; + runtime_options.keep_history = 0; break; case 'F': runtime_options.force = true; @@ -425,7 +425,7 @@ do_cluster_show(void) PQclear(res); } - static void +static void do_cluster_cleanup(void) { int master_id; @@ -434,14 +434,14 @@ do_cluster_cleanup(void) PGresult *res; char sqlquery[QUERY_STR_LEN]; - /* We need to connect to check configuration */ - log_info(_("%s connecting to database\n"), progname); - conn = establishDBConnection(options.conninfo, true); + /* We need to connect to check configuration */ + log_info(_("%s connecting to database\n"), progname); + conn = establishDBConnection(options.conninfo, true); /* check if there is a master in this cluster */ log_info(_("%s connecting to master database\n"), progname); master_conn = getMasterConnection(conn, repmgr_schema, options.cluster_name, - &master_id, NULL); + &master_id, NULL); if (!master_conn) { log_err(_("cluster cleanup: cannot connect to master\n")); @@ -453,8 +453,8 @@ do_cluster_cleanup(void) if (runtime_options.keep_history > 0) { sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor " - " WHERE age(now(), last_monitor_time) >= '%d days'::interval;", - repmgr_schema, runtime_options.keep_history); + " WHERE age(now(), last_monitor_time) >= '%d days'::interval;", + repmgr_schema, runtime_options.keep_history); } else { @@ -566,7 +566,7 @@ do_master_register(void) int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, repmgr_schema, + master_conn = getMasterConnection(conn, repmgr_schema, options.cluster_name, &id,NULL); if (master_conn != NULL) { @@ -595,8 +595,8 @@ do_master_register(void) sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) " "VALUES (%d, '%s', '%s', '%s', %d)", - repmgr_schema, options.node, options.cluster_name, options.node_name, - options.conninfo, options.priority); + repmgr_schema, options.node, options.cluster_name, options.node_name, + options.conninfo, options.priority); log_debug(_("master register: %s\n"), sqlquery); if (!PQexec(conn, sqlquery)) @@ -738,8 +738,8 @@ do_standby_register(void) sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority) " "VALUES (%d, '%s', '%s', '%s', %d)", - repmgr_schema, options.node, options.cluster_name, options.node_name, - options.conninfo, options.priority); + repmgr_schema, options.node, options.cluster_name, options.node_name, + options.conninfo, options.priority); log_debug(_("standby register: %s\n"), sqlquery); if (!PQexec(master_conn, sqlquery)) @@ -925,7 +925,7 @@ do_standby_clone(void) PQfinish(conn); exit(ERR_BAD_CONFIG); } - + /* We need all 5 parameters, and they can be retrieved only by superusers */ if (PQntuples(res) != 5) { @@ -988,8 +988,8 @@ do_standby_clone(void) log_notice(_("Starting backup...\n")); - /* - * in pg 9.1 default is to wait for a sync standby to ack, + /* + * in pg 9.1 default is to wait for a sync standby to ack, * avoid that by turning off sync rep for this session */ sqlquery_snprintf(sqlquery, "SET synchronous_commit TO OFF"); @@ -1382,17 +1382,18 @@ do_standby_follow(void) exit(ERR_BAD_CONFIG); } - /* - * we also need to check if there is any master in the cluster + /* + * we also need to check if there is any master in the cluster * or wait for one to appear if we have set the wait option */ log_info(_("%s discovering new master...\n"), progname); do { - master_conn = getMasterConnection(conn, repmgr_schema, - options.cluster_name, &master_id,(char *) &master_conninfo); - } while (master_conn == NULL && runtime_options.wait_for_master); + master_conn = getMasterConnection(conn, repmgr_schema, + options.cluster_name, &master_id,(char *) &master_conninfo); + } + while (master_conn == NULL && runtime_options.wait_for_master); if (master_conn == NULL) { @@ -1499,7 +1500,7 @@ do_witness_create(void) if (!create_pgdir(runtime_options.dest_dir, runtime_options.force)) { log_err(_("witness create: couldn't create data directory (\"%s\") for witness"), - runtime_options.dest_dir); + runtime_options.dest_dir); exit(ERR_BAD_CONFIG); } @@ -1595,8 +1596,8 @@ do_witness_create(void) /* Get the pg_hba.conf full path */ sqlquery_snprintf(sqlquery, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('hba_file')"); + " FROM pg_settings " + " WHERE name IN ('hba_file')"); log_debug(_("witness create: %s"), sqlquery); res = PQexec(masterconn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -1710,7 +1711,7 @@ help(const char *progname) printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n")); - printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n")); + printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n")); printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_(" -W, --wait wait for a master to appear")); @@ -1724,7 +1725,7 @@ help(const char *progname) printf(_("new master in the event of a failover\n")); printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); printf(_(" cluster show - print node informations\n")); - printf(_(" cluster cleanup - cleans monitor's history\n")); + printf(_(" cluster cleanup - cleans monitor's history\n")); } @@ -1776,7 +1777,7 @@ test_ssh_connection(char *host, char *remote_user) char script[MAXLEN]; int r; -/* On some OS, true is located in a different place than in Linux */ + /* On some OS, true is located in a different place than in Linux */ #ifdef __FreeBSD__ #define TRUEBIN_PATH "/usr/bin/true" #else @@ -2025,7 +2026,7 @@ create_schema(PGconn *conn) sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " " id integer primary key, " " cluster text not null, " - " name text not null, " + " name text not null, " " conninfo text not null, " " priority integer not null, " " witness boolean not null default false)", repmgr_schema); @@ -2058,8 +2059,8 @@ create_schema(PGconn *conn) /* a view */ sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " " SELECT primary_node, standby_node, name AS standby_name, last_monitor_time, " - " last_wal_primary_location, last_wal_standby_location, " - " pg_size_pretty(replication_lag) replication_lag, " + " last_wal_primary_location, last_wal_standby_location, " + " pg_size_pretty(replication_lag) replication_lag, " " pg_size_pretty(apply_lag) apply_lag, " " age(now(), last_monitor_time) AS time_lag " " FROM %s.repl_monitor JOIN %s.repl_nodes ON standby_node = id " @@ -2077,8 +2078,8 @@ create_schema(PGconn *conn) /* an index to improve performance of the view */ sqlquery_snprintf(sqlquery, "CREATE INDEX idx_repl_status_sort " - " ON %s.repl_monitor (last_monitor_time, standby_node) ", - repmgr_schema); + " ON %s.repl_monitor (last_monitor_time, standby_node) ", + repmgr_schema); log_debug(_("master register: %s\n"), sqlquery); if (!PQexec(conn, sqlquery)) { @@ -2090,9 +2091,9 @@ create_schema(PGconn *conn) /* XXX Here we MUST try to load the repmgr_function.sql not hardcode it here */ sqlquery_snprintf(sqlquery, - "CREATE OR REPLACE FUNCTION %s.repmgr_update_standby_location(text) RETURNS boolean " - "AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location' " - "LANGUAGE C STRICT ", repmgr_schema); + "CREATE OR REPLACE FUNCTION %s.repmgr_update_standby_location(text) RETURNS boolean " + "AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location' " + "LANGUAGE C STRICT ", repmgr_schema); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n", @@ -2101,9 +2102,9 @@ create_schema(PGconn *conn) } sqlquery_snprintf(sqlquery, - "CREATE OR REPLACE FUNCTION %s.repmgr_get_last_standby_location() RETURNS text " - "AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location' " - "LANGUAGE C STRICT ", repmgr_schema); + "CREATE OR REPLACE FUNCTION %s.repmgr_get_last_standby_location() RETURNS text " + "AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location' " + "LANGUAGE C STRICT ", repmgr_schema); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n", @@ -2175,30 +2176,35 @@ write_primary_conninfo(char* line) /* Environment variable for password (UGLY, please use .pgpass!) */ const char *password = getenv("PGPASSWORD"); - if (password != NULL) { + if (password != NULL) + { maxlen_snprintf(password_buf, " password=%s", password); } - else if (require_password) { + else if (require_password) + { log_err(_("%s: PGPASSWORD not set, but having one is required\n"), - progname); + progname); exit(ERR_BAD_PASSWORD); } - if (runtime_options.host[0]) { + if (runtime_options.host[0]) + { maxlen_snprintf(host_buf, " host=%s", runtime_options.host); } - if (runtime_options.username[0]) { + if (runtime_options.username[0]) + { maxlen_snprintf(user_buf, " user=%s", runtime_options.username); } - if (options.node_name[0]) { + if (options.node_name[0]) + { maxlen_snprintf(appname_buf, " application_name=%s", options.node_name); } maxlen_snprintf(conn_buf, "port=%s%s%s%s%s", - (runtime_options.masterport[0]) ? runtime_options.masterport : "5432", host_buf, user_buf, password_buf, - appname_buf); + (runtime_options.masterport[0]) ? runtime_options.masterport : "5432", host_buf, user_buf, password_buf, + appname_buf); maxlen_snprintf(line, "primary_conninfo = '%s'", conn_buf); diff --git a/repmgrd.c b/repmgrd.c index 96e5ea81..b444df4e 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -36,12 +36,12 @@ #include "access/xlogdefs.h" #include "libpq/pqsignal.h" -/* - * we do not export InvalidXLogRecPtr so we need to define it +/* + * we do not export InvalidXLogRecPtr so we need to define it * but since 9.3 it will be defined in xlogdefs.h which we include * so better to ask if it's defined to be future proof */ -#ifndef InvalidXLogRecPtr +#ifndef InvalidXLogRecPtr const XLogRecPtr InvalidXLogRecPtr = {0, 0}; #endif @@ -273,7 +273,7 @@ main(int argc, char **argv) /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); - primaryConn = getMasterConnection(myLocalConn, repmgr_schema, + primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.cluster_name, &primary_options.node, NULL); if (primaryConn == NULL) @@ -448,7 +448,7 @@ StandbyMonitor(void) log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n")); for (connection_retries = 0; connection_retries < 6; connection_retries++) { - primaryConn = getMasterConnection(myLocalConn, repmgr_schema, + primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.cluster_name, &primary_options.node, NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { @@ -780,9 +780,9 @@ CheckPrimaryConnection(void) { if (!is_pgup(primaryConn, local_options.master_response_timeout)) { - log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"), - progname, - (local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries))); + log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"), + progname, + (local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries))); /* wait local_options.reconnect_intvl seconds between retries */ sleep(local_options.reconnect_intvl); } @@ -889,7 +889,7 @@ checkNodeConfiguration(char *conninfo) "VALUES (%d, '%s', '%s', '%s', 0, 'f')", repmgr_schema, local_options.node, local_options.cluster_name, - local_options.node_name, + local_options.node_name, local_options.conninfo); if (!PQexec(primaryConn, sqlquery)) @@ -900,7 +900,7 @@ checkNodeConfiguration(char *conninfo) exit(ERR_BAD_CONFIG); } } - else + else { PQclear(res); } @@ -973,7 +973,7 @@ update_shared_memory(char *last_wal_standby_applied) PGresult *res; sprintf(sqlquery, "SELECT %s.repmgr_update_standby_location('%s')", - repmgr_schema, last_wal_standby_applied); + repmgr_schema, last_wal_standby_applied); /* If an error happens, just inform about that and continue */ res = PQexec(myLocalConn, sqlquery); From 2a5d4314812bff7a0ebce2b0af84eee7e0e652d6 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Wed, 19 Dec 2012 11:45:58 -0500 Subject: [PATCH 16/51] Fix a problem that caused a standby to promote itself without going to voting procedure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is because of a race condition inside CheckPrimaryConnection(). This has independently reported by Alex Railean and Dumitru, and Frank Jördens. Analyzed and fixed by Cédric Villemain. The fix have been verified to work by Frank --- repmgrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index b444df4e..84283fa9 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -792,7 +792,7 @@ CheckPrimaryConnection(void) { log_info(_("%s: Connection to master has been restored.\n"), progname); } - break; + return true; } } if (!is_pgup(primaryConn, local_options.master_response_timeout)) From 4191b77e7043d0d1ad432abd30c9c8db608e0e2e Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 11 Jan 2013 03:42:08 -0500 Subject: [PATCH 17/51] If the node is a witness don't bother asking its position, it always will be 0/0. We just need to check that we can connect to it to determine if we are in the majority. --- repmgrd.c | 59 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 84283fa9..5cd18e2a 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -54,6 +54,7 @@ typedef struct nodeInfo int nodeId; XLogRecPtr xlog_location; bool is_ready; + bool is_witness; } nodeInfo; @@ -574,6 +575,7 @@ do_failover(void) int total_nodes = 0; int visible_nodes = 0; bool find_best = false; + bool witness = false; int i; int r; @@ -593,7 +595,7 @@ do_failover(void) */ nodeInfo nodes[50]; /* initialize to keep compiler quiet */ - nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false }; + nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false, false}; /* first we get info about this node, and update shared memory */ sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); @@ -617,7 +619,7 @@ do_failover(void) sleep(SLEEP_MONITOR + 1); /* get a list of standby nodes, including myself */ - sprintf(sqlquery, "SELECT id, conninfo " + sprintf(sqlquery, "SELECT id, conninfo, witness " " FROM %s.repl_nodes " " WHERE id <> %d " " AND cluster = '%s' " @@ -633,6 +635,7 @@ do_failover(void) exit(ERR_DB_QUERY); } + log_debug(_("%s: there are %d nodes registered"), progname, PQntuples(res1)); /* ask for the locations */ for (i = 0; i < PQntuples(res1); i++) { @@ -640,33 +643,48 @@ do_failover(void) /* Initialize on false so if we can't reach this node we know that later */ nodes[i].is_ready = false; strncpy(nodeConninfo, PQgetvalue(res1, i, 1), MAXLEN); + witness = (strcmp(PQgetvalue(res1, i, 2), "t") == 0) ? true : false; + + log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s"), progname, node, nodeConninfo, (witness) ? "true" : "false"); + nodeConn = establishDBConnection(nodeConninfo, false); /* if we can't see the node just skip it */ if (PQstatus(nodeConn) != CONNECTION_OK) continue; - sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); - res2 = PQexec(nodeConn, sqlquery); - if (PQresultStatus(res2) != PGRES_TUPLES_OK) + /* the witness will always show 0/0 so avoid a useless query */ + if (!witness) { - log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); - log_info(_("Connection details: %s\n"), nodeConninfo); + sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); + res2 = PQexec(nodeConn, sqlquery); + if (PQresultStatus(res2) != PGRES_TUPLES_OK) + { + log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); + log_info(_("Connection details: %s\n"), nodeConninfo); + PQclear(res2); + PQfinish(nodeConn); + continue; + } + + if (sscanf(PQgetvalue(res2, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) + log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res2, 0, 0)); + PQclear(res2); - PQfinish(nodeConn); - continue; + } + else + { + uxlogid = 0; + uxrecoff = 0; } visible_nodes++; - if (sscanf(PQgetvalue(res2, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) - log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res2, 0, 0)); - nodes[i].nodeId = node; nodes[i].xlog_location.xlogid = uxlogid; nodes[i].xlog_location.xrecoff = uxrecoff; nodes[i].is_ready = true; + nodes[i].is_witness = witness; - PQclear(res2); PQfinish(nodeConn); } PQclear(res1); @@ -696,15 +714,21 @@ do_failover(void) */ for (i = 0; i < total_nodes - 1; i++) { + /* witness is never a good candidate */ + if (nodes[i].is_witness) + continue; + if (!nodes[i].is_ready) continue; - else if (!find_best) + + if (!find_best) { /* start with the first ready node, and then move on to the next one */ best_candidate.nodeId = nodes[i].nodeId; best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; best_candidate.is_ready = nodes[i].is_ready; + best_candidate.is_witness = nodes[i].is_witness; find_best = true; } @@ -720,12 +744,19 @@ do_failover(void) best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; best_candidate.is_ready = nodes[i].is_ready; + best_candidate.is_witness = nodes[i].is_witness; } } /* once we know who is the best candidate, promote it */ if (find_best && (best_candidate.nodeId == local_options.node)) { + if (best_candidate.is_witness) + { + log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname); + exit(ERR_FAILOVER_FAIL); + } + if (verbose) log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"), progname); From 49a2531930fdf42ec76416b692086c6274ab5982 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 13 Jan 2013 16:32:56 -0500 Subject: [PATCH 18/51] Options -F -W -I -v doesn't accept arguments, which means that on getopt_long shouldn't be marked with the colon (:) character. This has been wrong since day one, so backpatching all the way until 1.1 --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index a48ff9a8..5ed4c7d2 100644 --- a/repmgr.c +++ b/repmgr.c @@ -134,7 +134,7 @@ main(int argc, char **argv) } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:W:I:v", long_options, + while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:FWIv", long_options, &optindex)) != -1) { switch (c) From b0b44a157f2b546f9fc1f299a53d80f76cbd4a49 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Wed, 10 Jul 2013 09:53:45 -0500 Subject: [PATCH 19/51] If PQcancel() fails, consider it as if the master is failing. Because PQcancel() establish a new synchronous connection to the database, if it fails it means something wrong has happenned with master. So instead of just ignore the failure, CancelQuery() now reports a failure condition so we can detect master's death in that situation. This is very important specially when only postmaster crashes but other children/backend connections are still there. Because the children connection won't fail and CancelQuery() failure is our only indication of something wrong happenning. Currently we just ignore the PQcancel() failure which leads us to a situation in which we just loop forever trying to cancel the async query. Reported by: Martin Euser Problem analyzed and bug spotted by: Andres Freund Patch by: Jaime Casanova --- dbutils.c | 28 ++++++++++++++++++++++------ dbutils.h | 2 +- repmgrd.c | 8 +++++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/dbutils.c b/dbutils.c index 8ed67c2e..ca7ac4bc 100644 --- a/dbutils.c +++ b/dbutils.c @@ -146,7 +146,8 @@ is_pgup(PGconn *conn, int timeout) /* * Send a SELECT 1 just to check if the connection is OK */ - CancelQuery(conn, timeout); + if (!CancelQuery(conn, timeout)) + goto failed; if (wait_connection_availability(conn, timeout) != 1) goto failed; @@ -429,18 +430,33 @@ wait_connection_availability(PGconn *conn, int timeout) } -void +bool CancelQuery(PGconn *conn, int timeout) { char errbuf[ERRBUFF_SIZE]; PGcancel *pgcancel; - wait_connection_availability(conn, timeout); + if (wait_connection_availability(conn, timeout) != 1) + return false; pgcancel = PQgetCancel(conn); - if (!pgcancel || PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0) - log_warning(_("Can't stop current query: %s\n"), errbuf); + if (pgcancel != NULL) + { + /* + * PQcancel can only return 0 if socket()/connect()/send() + * fails, in any of those cases we can assume something + * bad happened to the connection + */ + if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0) + { + log_warning(_("Can't stop current query: %s\n"), errbuf); + PQfreeCancel(pgcancel); + return false; + } - PQfreeCancel(pgcancel); + PQfreeCancel(pgcancel); + } + + return true; } diff --git a/dbutils.h b/dbutils.h index 90a6d390..8d983048 100644 --- a/dbutils.h +++ b/dbutils.h @@ -37,5 +37,5 @@ PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, int *master_id, char *master_conninfo_out); int wait_connection_availability(PGconn *conn, int timeout); -void CancelQuery(PGconn *conn, int timeout); +bool CancelQuery(PGconn *conn, int timeout); #endif diff --git a/repmgrd.c b/repmgrd.c index 5cd18e2a..a15cbef2 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -111,7 +111,7 @@ static void setup_event_handlers(void); #define CloseConnections() \ if (PQisBusy(primaryConn) == 1) \ - CancelQuery(primaryConn, local_options.master_response_timeout); \ + (void) CancelQuery(primaryConn, local_options.master_response_timeout); \ if (myLocalConn != NULL) \ PQfinish(myLocalConn); \ if (primaryConn != NULL && primaryConn != myLocalConn) \ @@ -376,7 +376,8 @@ WitnessMonitor(void) * Cancel any query that is still being executed, * so i can insert the current record */ - CancelQuery(primaryConn, local_options.master_response_timeout); + if (!CancelQuery(primaryConn, local_options.master_response_timeout)) + return; if (wait_connection_availability(primaryConn, local_options.master_response_timeout) != 1) return; @@ -497,7 +498,8 @@ StandbyMonitor(void) * Cancel any query that is still being executed, * so i can insert the current record */ - CancelQuery(primaryConn, local_options.master_response_timeout); + if (!CancelQuery(primaryConn, local_options.master_response_timeout)) + return; if (wait_connection_availability(primaryConn, local_options.master_response_timeout) != 1) return; From 2bc8044fdabcd520cf5cc94ba90005bfad8d5072 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Wed, 10 Jul 2013 19:25:58 -0500 Subject: [PATCH 20/51] Improve messages in wait_connection_availability, so we know what error makes the failover procedure to start By gripe from Andres Freund --- dbutils.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbutils.c b/dbutils.c index ca7ac4bc..0aef3964 100644 --- a/dbutils.c +++ b/dbutils.c @@ -409,7 +409,7 @@ wait_connection_availability(PGconn *conn, int timeout) { if (PQconsumeInput(conn) == 0) { - log_warning(_("PQconsumeInput: Query could not be sent to primary. %s\n"), + log_warning(_("wait_connection_availability: could not receive data from master. %s\n"), PQerrorMessage(conn)); return 0; } @@ -425,8 +425,10 @@ wait_connection_availability(PGconn *conn, int timeout) } if (timeout >= 0) return 1; - else + else { + log_warning(_("wait_connection_availability: timeout reached"); return -1; + } } From 2e7acf03c4f2a45339be89e2cc97264e535ca362 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 12 Jul 2013 08:01:01 -0500 Subject: [PATCH 21/51] If PQgetCancel() returns NULL we should also return false. Noted by Andres Freund. --- dbutils.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dbutils.c b/dbutils.c index 0aef3964..96ba5530 100644 --- a/dbutils.c +++ b/dbutils.c @@ -443,22 +443,22 @@ CancelQuery(PGconn *conn, int timeout) pgcancel = PQgetCancel(conn); - if (pgcancel != NULL) - { - /* - * PQcancel can only return 0 if socket()/connect()/send() - * fails, in any of those cases we can assume something - * bad happened to the connection - */ - if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0) - { - log_warning(_("Can't stop current query: %s\n"), errbuf); - PQfreeCancel(pgcancel); - return false; - } + if (pgcancel == NULL) + return false; + /* + * PQcancel can only return 0 if socket()/connect()/send() + * fails, in any of those cases we can assume something + * bad happened to the connection + */ + if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0) + { + log_warning(_("Can't stop current query: %s\n"), errbuf); PQfreeCancel(pgcancel); + return false; } + PQfreeCancel(pgcancel); + return true; } From ad3630e7a93d2be3113d2605dbfb74ce41ec5da0 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sat, 13 Jul 2013 12:37:15 -0500 Subject: [PATCH 22/51] Add a missing ')'. This is a typo introduced in commit 2bc8044fdabcd520cf5cc94ba90005bfad8d5072 Per complaint from Carlos Chapi when compiling for a customer. --- dbutils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index 96ba5530..0e4c4749 100644 --- a/dbutils.c +++ b/dbutils.c @@ -426,7 +426,7 @@ wait_connection_availability(PGconn *conn, int timeout) if (timeout >= 0) return 1; else { - log_warning(_("wait_connection_availability: timeout reached"); + log_warning(_("wait_connection_availability: timeout reached")); return -1; } } From 3b66a31ac9a65b8983618677b6a56f2eaedd9e9a Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 26 Jul 2013 00:52:31 -0500 Subject: [PATCH 23/51] In a failover situation get the nodes in a well defined order. When deciding which node will be the new master, we should get the nodes in a well defined order otherwise two standbys could process nodes with the same priority in different order and end up with a two master situation. --- repmgrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index a15cbef2..a68532f9 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -625,7 +625,7 @@ do_failover(void) " FROM %s.repl_nodes " " WHERE id <> %d " " AND cluster = '%s' " - " ORDER BY priority ", + " ORDER BY priority, id ", repmgr_schema, primary_options.node, local_options.cluster_name); res1 = PQexec(myLocalConn, sqlquery); From 1afaa3a26fb9cb4bd781ebf413ab5f6c72aec2e5 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 22 Aug 2013 14:50:29 -0500 Subject: [PATCH 24/51] Rearrange the logic in do_failover() for further improvements. Specially, make this a more coordinated process by making all nodes waiting for the others before going to the next step. This is one step further in following Andres Freund advices but there is still a lot to do in order to complete that, specially it could be needed to add more fields to repl_nodes and to the shm area. --- repmgrd.c | 280 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 194 insertions(+), 86 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index a68532f9..1f60162a 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -52,8 +52,10 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0}; typedef struct nodeInfo { int nodeId; + char conninfostr[MAXLEN]; XLogRecPtr xlog_location; bool is_ready; + bool is_visible; bool is_witness; } nodeInfo; @@ -570,23 +572,22 @@ StandbyMonitor(void) static void do_failover(void) { - PGresult *res1; - PGresult *res2; + PGresult *res; char sqlquery[8192]; int total_nodes = 0; int visible_nodes = 0; + int ready_nodes = 0; + bool find_best = false; - bool witness = false; int i; int r; - int node; - char nodeConninfo[MAXLEN]; + uint32 uxlogid; + uint32 uxrecoff; + XLogRecPtr xlog_recptr; - unsigned int uxlogid; - unsigned int uxrecoff; char last_wal_standby_applied[MAXLEN]; PGconn *nodeConn = NULL; @@ -596,108 +597,60 @@ do_failover(void) * which seems to be large enough for most scenarios */ nodeInfo nodes[50]; + /* initialize to keep compiler quiet */ - nodeInfo best_candidate = {-1, InvalidXLogRecPtr, false, false}; - - /* first we get info about this node, and update shared memory */ - sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); - res1 = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res1) != PGRES_TUPLES_OK) - { - log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn)); - PQclear(res1); - sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); - update_shared_memory(last_wal_standby_applied); - exit(ERR_DB_QUERY); - } - - /* write last location in shared memory */ - update_shared_memory(PQgetvalue(res1, 0, 0)); - - /* - * we sleep the monitor time + one second - * we bet it should be enough for other repmgrd to update their own data - */ - sleep(SLEEP_MONITOR + 1); + nodeInfo best_candidate = {-1, "", InvalidXLogRecPtr, false, false, false}; /* get a list of standby nodes, including myself */ sprintf(sqlquery, "SELECT id, conninfo, witness " " FROM %s.repl_nodes " - " WHERE id <> %d " - " AND cluster = '%s' " + " WHERE cluster = '%s' " " ORDER BY priority, id ", - repmgr_schema, primary_options.node, local_options.cluster_name); + repmgr_schema, local_options.cluster_name); - res1 = PQexec(myLocalConn, sqlquery); - if (PQresultStatus(res1) != PGRES_TUPLES_OK) + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("Can't get nodes info: %s\n"), PQerrorMessage(myLocalConn)); - PQclear(res1); + log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn)); + PQclear(res); PQfinish(myLocalConn); exit(ERR_DB_QUERY); } - log_debug(_("%s: there are %d nodes registered"), progname, PQntuples(res1)); - /* ask for the locations */ - for (i = 0; i < PQntuples(res1); i++) + /* + * total nodes that are registered + */ + total_nodes = PQntuples(res); + log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes); + + /* Build an array with the nodes and indicate which ones are visible and ready */ + for (i = 0; i < total_nodes; i++) { - node = atoi(PQgetvalue(res1, i, 0)); + nodes[i].nodeId = atoi(PQgetvalue(res, i, 0)); + strncpy(nodes[i].conninfostr, PQgetvalue(res, i, 1), MAXLEN); + nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false; + /* Initialize on false so if we can't reach this node we know that later */ + nodes[i].is_visible = false; nodes[i].is_ready = false; - strncpy(nodeConninfo, PQgetvalue(res1, i, 1), MAXLEN); - witness = (strcmp(PQgetvalue(res1, i, 2), "t") == 0) ? true : false; + nodes[i].xlog_location.xlogid = 0; + nodes[i].xlog_location.xrecoff = 0; - log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s"), progname, node, nodeConninfo, (witness) ? "true" : "false"); + log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), + progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false"); - nodeConn = establishDBConnection(nodeConninfo, false); + nodeConn = establishDBConnection(nodes[i].conninfostr, false); /* if we can't see the node just skip it */ if (PQstatus(nodeConn) != CONNECTION_OK) continue; - /* the witness will always show 0/0 so avoid a useless query */ - if (!witness) - { - sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); - res2 = PQexec(nodeConn, sqlquery); - if (PQresultStatus(res2) != PGRES_TUPLES_OK) - { - log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); - log_info(_("Connection details: %s\n"), nodeConninfo); - PQclear(res2); - PQfinish(nodeConn); - continue; - } - - if (sscanf(PQgetvalue(res2, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) - log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res2, 0, 0)); - - PQclear(res2); - } - else - { - uxlogid = 0; - uxrecoff = 0; - } - visible_nodes++; - - nodes[i].nodeId = node; - nodes[i].xlog_location.xlogid = uxlogid; - nodes[i].xlog_location.xrecoff = uxrecoff; - nodes[i].is_ready = true; - nodes[i].is_witness = witness; - + nodes[i].is_visible = true; PQfinish(nodeConn); } - PQclear(res1); - /* Close the connection to this server */ - PQfinish(myLocalConn); + PQclear(res); - /* - * total nodes that are registered, include master which is a node but was - * not counted because it's not a standby - */ - total_nodes = i + 1; + log_debug(_("Total nodes counted: registered=%d, visible=%d\n"), total_nodes, visible_nodes); /* * am i on the group that should keep alive? @@ -711,16 +664,165 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* Query all the nodes to determine which ones are ready */ + for (i = 0; i < total_nodes; i++) + { + /* if the node is not visible, skip it */ + if (!nodes[i].is_visible) + continue; + + if (nodes[i].is_witness) + continue; + + nodeConn = establishDBConnection(nodes[i].conninfostr, false); + /* XXX + * This shouldn't happen, if this happens it means this is a major problem + * maybe network outages? anyway, is better for a human to react + */ + if (PQstatus(nodeConn) != CONNECTION_OK) + { + log_err(_("It seems new problems are arising, manual intervention is needed\n")); + exit(ERR_FAILOVER_FAIL); + } + + sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); + res = PQexec(nodeConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn)); + log_info(_("Connection details: %s\n"), nodes[i].conninfostr); + PQclear(res); + PQfinish(nodeConn); + exit(ERR_FAILOVER_FAIL); + } + + if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) + log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); + + log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", + nodes[i].nodeId, uxlogid, uxlogid, uxrecoff, uxrecoff); + + /* If position is 0/0, error */ + if (uxlogid == 0 && uxrecoff == 0) + { + PQclear(res); + PQfinish(nodeConn); + log_info(_("InvalidXLogRecPtr detected in a standby\n")); + exit(ERR_FAILOVER_FAIL); + } + + nodes[i].xlog_location.xlogid = uxlogid; + nodes[i].xlog_location.xrecoff = uxrecoff; + + PQclear(res); + PQfinish(nodeConn); + } + + /* last we get info about this node, and update shared memory */ + sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); + res = PQexec(myLocalConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn)); + PQfinish(myLocalConn); + PQclear(res); + sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); + update_shared_memory(last_wal_standby_applied); + exit(ERR_DB_QUERY); + } + + /* write last location in shared memory */ + update_shared_memory(PQgetvalue(res, 0, 0)); + PQclear(res); + + for (i = 0; i < total_nodes; i++) + { + while (!nodes[i].is_ready) + { + /* + * the witness will always be masked as ready if it's still + * not marked that way and avoid a useless query + */ + if (nodes[i].is_witness) + { + if (!nodes[i].is_ready) + { + nodes[i].is_ready = true; + ready_nodes++; + } + break; + } + + /* if the node is not visible, skip it */ + if (!nodes[i].is_visible) + break; + + /* if the node is ready there is nothing to check, skip it too */ + if (nodes[i].is_ready) + break; + + nodeConn = establishDBConnection(nodes[i].conninfostr, false); + /* XXX + * This shouldn't happen, if this happens it means this is a major problem + * maybe network outages? anyway, is better for a human to react + */ + if (PQstatus(nodeConn) != CONNECTION_OK) + { + /* XXX */ + log_info(_("At this point, it could be some race conditions that are acceptable, assume the node is restarting and starting failover procedure\n")); + break; + } + + sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); + res = PQexec(nodeConn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn)); + PQclear(res); + PQfinish(nodeConn); + exit(ERR_DB_QUERY); + } + + if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) + log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); + + PQclear(res); + PQfinish(nodeConn); + /* If position is 0/0, keep checking */ + if (uxlogid == 0 && uxrecoff == 0) + continue; + + xlog_recptr.xlogid = uxlogid; + xlog_recptr.xrecoff = uxrecoff; + + if (XLByteLT(nodes[i].xlog_location, xlog_recptr)) + { + nodes[i].xlog_location.xlogid = uxlogid; + nodes[i].xlog_location.xrecoff = uxrecoff; + } + + log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", + nodes[i].nodeId, nodes[i].xlog_location.xlogid, nodes[i].xlog_location.xlogid, + nodes[i].xlog_location.xrecoff, nodes[i].xlog_location.xrecoff); + + ready_nodes++; + nodes[i].is_ready = true; + } + } + + /* Close the connection to this server */ + PQfinish(myLocalConn); + /* * determine which one is the best candidate to promote to primary */ - for (i = 0; i < total_nodes - 1; i++) + for (i = 0; i < total_nodes; i++) { /* witness is never a good candidate */ if (nodes[i].is_witness) continue; - if (!nodes[i].is_ready) + if (!nodes[i].is_ready || !nodes[i].is_visible) continue; if (!find_best) @@ -759,6 +861,9 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* wait */ + sleep(5); + if (verbose) log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"), progname); @@ -772,6 +877,9 @@ do_failover(void) } else if (find_best) { + /* wait */ + sleep(10); + if (verbose) log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"), progname, best_candidate.nodeId); From d99024ba1123a3f44fce198d9285659fe9ee0c7d Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Mon, 9 Sep 2013 11:10:20 -0500 Subject: [PATCH 25/51] Make repmgrd survive to the failover To do this it needs to reconnect to the new master --- repmgrd.c | 239 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 131 insertions(+), 108 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 1f60162a..b55e9342 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -81,6 +81,8 @@ bool verbose = false; bool monitoring_history = false; char repmgr_schema[MAXLEN]; +bool failover_done = false; + /* * should initialize with {0} to be ANSI complaint ? but this raises * error with gcc -Wall @@ -203,63 +205,37 @@ main(int argc, char **argv) exit(ERR_BAD_CONFIG); } + /* - * Set my server mode, establish a connection to primary - * and start monitor - */ - if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) - myLocalMode = WITNESS_MODE; - else if (is_standby(myLocalConn)) - myLocalMode = STANDBY_MODE; - else /* is the master */ - myLocalMode = PRIMARY_MODE; - - switch (myLocalMode) + * MAIN LOOP + * This loops cicles once per failover and at startup + * Requisites: + * - myLocalConn needs to be already setted with an active connection + * - no master connection + */ + do { - case PRIMARY_MODE: - primary_options.node = local_options.node; - strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); - primaryConn = myLocalConn; - - checkClusterConfiguration(myLocalConn, primaryConn); - checkNodeConfiguration(local_options.conninfo); - - if (reload_configuration(config_file, &local_options)) - { - PQfinish(myLocalConn); - myLocalConn = establishDBConnection(local_options.conninfo, true); - primaryConn = myLocalConn; - update_registration(); - } - - log_info(_("%s Starting continuous primary connection check\n"), progname); - /* Check that primary is still alive, and standbies are sending info */ /* - * Every SLEEP_MONITOR seconds, do master checks - * XXX - * Check that standbies are sending info - */ - for (;;) + * Set my server mode, establish a connection to primary + * and start monitor + */ + if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) + myLocalMode = WITNESS_MODE; + else if (is_standby(myLocalConn)) + myLocalMode = STANDBY_MODE; + else /* is the master */ + myLocalMode = PRIMARY_MODE; + + switch (myLocalMode) { - if (CheckPrimaryConnection()) - { - /* - CheckActiveStandbiesConnections(); - CheckInactiveStandbies(); - */ - sleep(SLEEP_MONITOR); - } - else - { - /* XXX - * May we do something more verbose ? - */ - exit (1); - } - - if (got_SIGHUP) - { - /* if we can reload, then could need to change myLocalConn */ + case PRIMARY_MODE: + primary_options.node = local_options.node; + strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); + primaryConn = myLocalConn; + + checkClusterConfiguration(myLocalConn, primaryConn); + checkNodeConfiguration(local_options.conninfo); + if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); @@ -267,70 +243,112 @@ main(int argc, char **argv) primaryConn = myLocalConn; update_registration(); } - got_SIGHUP = false; - } - } - break; - case WITNESS_MODE: - case STANDBY_MODE: - /* I need the id of the primary as well as a connection to it */ - log_info(_("%s Connecting to primary for cluster '%s'\n"), - progname, local_options.cluster_name); - primaryConn = getMasterConnection(myLocalConn, repmgr_schema, - local_options.cluster_name, - &primary_options.node, NULL); - if (primaryConn == NULL) - { - CloseConnections(); - exit(ERR_BAD_CONFIG); - } + + log_info(_("%s Starting continuous primary connection check\n"), progname); + + /* Check that primary is still alive, and standbies are sending info */ + + /* + * Every SLEEP_MONITOR seconds, do master checks + * XXX + * Check that standbies are sending info + */ + do + { + if (CheckPrimaryConnection()) + { + /* + CheckActiveStandbiesConnections(); + CheckInactiveStandbies(); + */ + sleep(SLEEP_MONITOR); + } + else + { + /* XXX + * May we do something more verbose ? + */ + exit(1); + } + + if (got_SIGHUP) + { + /* if we can reload, then could need to change myLocalConn */ + if (reload_configuration(config_file, &local_options)) + { + PQfinish(myLocalConn); + myLocalConn = establishDBConnection(local_options.conninfo, true); + primaryConn = myLocalConn; + update_registration(); + } + got_SIGHUP = false; + } + } while (!failover_done); + break; + case WITNESS_MODE: + case STANDBY_MODE: + /* I need the id of the primary as well as a connection to it */ + log_info(_("%s Connecting to primary for cluster '%s'\n"), + progname, local_options.cluster_name); + primaryConn = getMasterConnection(myLocalConn, repmgr_schema, + local_options.cluster_name, + &primary_options.node, NULL); + if (primaryConn == NULL) + { + CloseConnections(); + exit(ERR_BAD_CONFIG); + } - checkClusterConfiguration(myLocalConn, primaryConn); - checkNodeConfiguration(local_options.conninfo); + checkClusterConfiguration(myLocalConn, primaryConn); + checkNodeConfiguration(local_options.conninfo); - if (reload_configuration(config_file, &local_options)) - { - PQfinish(myLocalConn); - myLocalConn = establishDBConnection(local_options.conninfo, true); - update_registration(); - } - - /* - * Every SLEEP_MONITOR seconds, do checks - */ - if (myLocalMode == WITNESS_MODE) - { - log_info(_("%s Starting continuous witness node monitoring\n"), progname); - } - else if (myLocalMode == STANDBY_MODE) - { - log_info(_("%s Starting continuous standby node monitoring\n"), progname); - } - - for (;;) - { - if (myLocalMode == WITNESS_MODE) - WitnessMonitor(); - else if (myLocalMode == STANDBY_MODE) - StandbyMonitor(); - sleep(SLEEP_MONITOR); - - if (got_SIGHUP) - { - /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } - got_SIGHUP = false; - } + + /* + * Every SLEEP_MONITOR seconds, do checks + */ + if (myLocalMode == WITNESS_MODE) + { + log_info(_("%s Starting continuous witness node monitoring\n"), progname); + } + else if (myLocalMode == STANDBY_MODE) + { + log_info(_("%s Starting continuous standby node monitoring\n"), progname); + } + + do + { + if (myLocalMode == WITNESS_MODE) + WitnessMonitor(); + else if (myLocalMode == STANDBY_MODE) + StandbyMonitor(); + sleep(SLEEP_MONITOR); + + if (got_SIGHUP) + { + /* if we can reload, then could need to change myLocalConn */ + if (reload_configuration(config_file, &local_options)) + { + PQfinish(myLocalConn); + myLocalConn = establishDBConnection(local_options.conninfo, true); + update_registration(); + } + got_SIGHUP = false; + } + } while (!failover_done); + break; + default: + log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } - break; - default: - log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); - } + + failover_done = false; + + } while (true); /* Prevent a double-free */ if (primaryConn == myLocalConn) @@ -481,6 +499,7 @@ StandbyMonitor(void) * a new primaryConn */ do_failover(); + return; } } @@ -901,6 +920,9 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } + /* to force it to re-calculate mode and master node */ + failover_done = true; + /* and reconnect to the local database */ myLocalConn = establishDBConnection(local_options.conninfo, true); } @@ -1089,6 +1111,7 @@ static void handle_sigint(SIGNAL_ARGS) { CloseConnections(); + logger_shutdown(); exit(1); } From b4107726273ada63f9e70366a573c8af5d09ce51 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 29 Aug 2013 15:27:37 -0500 Subject: [PATCH 26/51] Rework algorithm to coordinate voting Make this by waiting for all nodes to finish a step, before starting a new one. So everyone starts promoting or following in a coordinated fashion. Also make a few fixes. From 3c8df59eb93f923e0a99f5bcf717d63550bc292f Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 14 Nov 2013 00:43:35 -0500 Subject: [PATCH 27/51] Make repmgr compile in 9.3. Patch provided by Shawn Ellis with some fixes by me. --- CREDITS | 1 + repmgrd.c | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/CREDITS b/CREDITS index 78e707b8..753e80fd 100644 --- a/CREDITS +++ b/CREDITS @@ -10,3 +10,4 @@ Hannu Krosing Cédric Villemain Charles Duffy Daniel Farina +Shawn Ellis diff --git a/repmgrd.c b/repmgrd.c index b55e9342..bad8ebc5 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -45,6 +45,26 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0}; #endif +#if PG_VERSION_NUM >= 90300 + #define XLAssign(a, b) \ + a = b + + #define XLAssignValue(a, xlogid, xrecoff) \ + a = xrecoff + + #define XLByteLT(a, b) \ + (a < b) + +#else + #define XLAssign(a, b) \ + a.xlogid = b.xlogid; \ + a.xrecoff = b.xrecoff + + #define XLAssignValue(a, uxlogid, uxrecoff) \ + a.xlogid = uxlogid; \ + a.xrecoff = uxrecoff +#endif + /* * Struct to keep info about the nodes, used in the voting process in * do_failover() @@ -652,8 +672,7 @@ do_failover(void) /* Initialize on false so if we can't reach this node we know that later */ nodes[i].is_visible = false; nodes[i].is_ready = false; - nodes[i].xlog_location.xlogid = 0; - nodes[i].xlog_location.xrecoff = 0; + XLAssignValue(nodes[i].xlog_location, 0, 0); log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false"); @@ -730,8 +749,7 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } - nodes[i].xlog_location.xlogid = uxlogid; - nodes[i].xlog_location.xrecoff = uxrecoff; + XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); PQclear(res); PQfinish(nodeConn); @@ -811,18 +829,16 @@ do_failover(void) if (uxlogid == 0 && uxrecoff == 0) continue; - xlog_recptr.xlogid = uxlogid; - xlog_recptr.xrecoff = uxrecoff; + XLAssignValue(xlog_recptr, uxlogid, uxrecoff); if (XLByteLT(nodes[i].xlog_location, xlog_recptr)) { - nodes[i].xlog_location.xlogid = uxlogid; - nodes[i].xlog_location.xrecoff = uxrecoff; + XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); } log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", - nodes[i].nodeId, nodes[i].xlog_location.xlogid, nodes[i].xlog_location.xlogid, - nodes[i].xlog_location.xrecoff, nodes[i].xlog_location.xrecoff); + nodes[i].nodeId, uxlogid, uxlogid, + uxrecoff, uxrecoff); ready_nodes++; nodes[i].is_ready = true; @@ -848,8 +864,7 @@ do_failover(void) { /* start with the first ready node, and then move on to the next one */ best_candidate.nodeId = nodes[i].nodeId; - best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; - best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; + XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; find_best = true; @@ -864,8 +879,7 @@ do_failover(void) if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location)) { best_candidate.nodeId = nodes[i].nodeId; - best_candidate.xlog_location.xlogid = nodes[i].xlog_location.xlogid; - best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff; + XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; } From 43af00aa12de5d8d9e5a695198202d0e12495a48 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Wed, 4 Dec 2013 01:23:48 -0500 Subject: [PATCH 28/51] Ignore pg_log when cloning, just like we ignore pg_xlog --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 5ed4c7d2..aa27713f 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1827,7 +1827,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, if (is_directory) { - strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); + strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_log* --exclude=pg_control --exclude=*.pid"); maxlen_snprintf(script, "rsync %s %s:%s/* %s", rsync_flags, host_string, remote_path, local_path); } From 8b370dc58163956e8c42ab74ecdaec7de28cd11d Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sat, 7 Dec 2013 13:25:46 -0500 Subject: [PATCH 29/51] Fix some typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Krzysztof Gajdemski --- repmgr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/repmgr.c b/repmgr.c index aa27713f..a20c159b 100644 --- a/repmgr.c +++ b/repmgr.c @@ -396,7 +396,7 @@ do_cluster_show(void) if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("Can't get nodes informations, have you regitered them?\n%s\n"), PQerrorMessage(conn)); + log_err(_("Can't get nodes information, have you registered them?\n%s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); exit(ERR_BAD_CONFIG); @@ -952,7 +952,7 @@ do_standby_clone(void) } PQclear(res); - log_info(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); + log_info(_("Successfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); /* * XXX master_xlog_directory should be discovered from master configuration @@ -1535,7 +1535,7 @@ do_witness_create(void) exit(ERR_BAD_CONFIG); } - log_info(_("Succesfully connected to primary.\n")); + log_info(_("Successfully connected to primary.\n")); r = test_ssh_connection(runtime_options.host, runtime_options.remote_user); if (r != 0) @@ -1672,7 +1672,7 @@ do_witness_create(void) PQfinish(masterconn); PQfinish(witnessconn); - log_notice(_("Configuration has been succesfully copied to the witness\n")); + log_notice(_("Configuration has been successfully copied to the witness\n")); } @@ -1724,7 +1724,7 @@ help(const char *progname) printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_("new master in the event of a failover\n")); printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); - printf(_(" cluster show - print node informations\n")); + printf(_(" cluster show - print node information\n")); printf(_(" cluster cleanup - cleans monitor's history\n")); } From 493133986d0a9e9a25a35c833aa4c7e85ca70c7c Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 00:06:01 -0500 Subject: [PATCH 30/51] Add timestamps to log line in stderr Patch by Christian Kruse --- log.c | 27 ++++++++++++++++++++++++++- log.h | 18 ++++++++++-------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/log.c b/log.c index e56bd4f8..41e18f2c 100644 --- a/log.c +++ b/log.c @@ -25,9 +25,11 @@ #ifdef HAVE_SYSLOG #include -#include #endif +#include +#include + #include "log.h" #define DEFAULT_IDENT "repmgr" @@ -37,6 +39,29 @@ /* #define REPMGR_DEBUG */ +void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) { + size_t len = strlen(fmt); + char fmt1[len + 150]; + time_t t; + struct tm *tm; + char buff[100]; + va_list ap; + + if(log_level >= level) { + time(&t); + tm = localtime(&t); + + va_start(ap, fmt); + + strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm); + snprintf(fmt1, len + 150, "%s [%s] %s", buff, level_name, fmt); + vfprintf(stderr, fmt1, ap); + + va_end(ap); + } +} + + static int detect_log_level(const char* level); static int detect_log_facility(const char* facility); diff --git a/log.h b/log.h index c7c2af3c..643b3f69 100644 --- a/log.h +++ b/log.h @@ -25,15 +25,17 @@ #define REPMGR_SYSLOG 1 #define REPMGR_STDERR 2 +void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...); + /* Standard error logging */ -#define stderr_log_debug(...) if (log_level >= LOG_DEBUG) fprintf(stderr, __VA_ARGS__) -#define stderr_log_info(...) if (log_level >= LOG_INFO) fprintf(stderr, __VA_ARGS__) -#define stderr_log_notice(...) if (log_level >= LOG_NOTICE) fprintf(stderr, __VA_ARGS__) -#define stderr_log_warning(...) if (log_level >= LOG_WARNING) fprintf(stderr, __VA_ARGS__) -#define stderr_log_err(...) if (log_level >= LOG_ERR) fprintf(stderr, __VA_ARGS__) -#define stderr_log_crit(...) if (log_level >= LOG_CRIT) fprintf(stderr, __VA_ARGS__) -#define stderr_log_alert(...) if (log_level >= LOG_ALERT) fprintf(stderr, __VA_ARGS__) -#define stderr_log_emerg(...) if (log_level >= LOG_EMERG) fprintf(stderr, __VA_ARGS__) +#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__) +#define stderr_log_info(...) stderr_log_with_level("INFO", LOG_INFO, __VA_ARGS__) +#define stderr_log_notice(...) stderr_log_with_level("NOTICE", LOG_NOTICE, __VA_ARGS__) +#define stderr_log_warning(...) stderr_log_with_level("WARNING", LOG_WARNING, __VA_ARGS__) +#define stderr_log_err(...) stderr_log_with_level("ERROR", LOG_ERR, __VA_ARGS__) +#define stderr_log_crit(...) stderr_log_with_level("CRITICAL", LOG_CRIT, __VA_ARGS__) +#define stderr_log_alert(...) stderr_log_with_level("ALERT", LOG_ALERT, __VA_ARGS__) +#define stderr_log_emerg(...) stderr_log_with_level("EMERGENCY", LOG_EMERG, __VA_ARGS__) #ifdef HAVE_SYSLOG From a1f4285e2bc78f776b3756400b757ddecb50395d Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 00:55:56 -0500 Subject: [PATCH 31/51] Add guc_setted_typed() function to allow wal_keep_segmeents to be checked as an integer instead of text Patch by Jay Taylor --- dbutils.c | 34 ++++++++++++++++++++++++++++++++++ dbutils.h | 2 ++ repmgr.c | 2 +- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index 0e4c4749..9157a6e9 100644 --- a/dbutils.c +++ b/dbutils.c @@ -250,6 +250,40 @@ guc_setted(PGconn *conn, const char *parameter, const char *op, return true; } +/** + * Just like guc_setted except with an extra parameter containing the name of + * the pg datatype so that the comparison can be done properly. + */ +bool +guc_setted_typed(PGconn *conn, const char *parameter, const char *op, + const char *value, const char *datatype) +{ + PGresult *res; + char sqlquery[QUERY_STR_LEN]; + + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " + " WHERE name = '%s' AND setting::%s %s '%s'::%s", + parameter, datatype, op, value, datatype); + + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("GUC setting check PQexec failed: %s"), + PQerrorMessage(conn)); + PQclear(res); + PQfinish(conn); + exit(ERR_DB_QUERY); + } + if (PQntuples(res) == 0) + { + PQclear(res); + return false; + } + PQclear(res); + + return true; +} + const char * get_cluster_size(PGconn *conn) diff --git a/dbutils.h b/dbutils.h index 8d983048..4ce7f0a6 100644 --- a/dbutils.h +++ b/dbutils.h @@ -32,6 +32,8 @@ bool is_pgup(PGconn *conn, int timeout); char *pg_version(PGconn *conn, char* major_version); bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); +bool guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value, const char *datatype); const char *get_cluster_size(PGconn *conn); PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, int *master_id, char *master_conninfo_out); diff --git a/repmgr.c b/repmgr.c index a20c159b..111b4e5e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -842,7 +842,7 @@ do_standby_clone(void) log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); exit(ERR_BAD_CONFIG); } - if (!guc_setted(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments)) + if (!guc_setted_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer")) { PQfinish(conn); log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments); From 5e04ab6eaeaed8ea01bc806694dde533c0f943c8 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 00:58:10 -0500 Subject: [PATCH 32/51] Add a ssh_options parameter to allow ssh checking to consider non-default values (ie: a different port) Patch by Jay Taylor --- config.c | 4 ++++ config.h | 1 + repmgr.c | 6 +++--- repmgr.conf.sample | 3 ++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/config.c b/config.c index 7c4e4ca0..03b2e1c2 100644 --- a/config.c +++ b/config.c @@ -41,6 +41,7 @@ parse_config(const char *config_file, t_configuration_options *options) memset(options->promote_command, 0, sizeof(options->promote_command)); memset(options->follow_command, 0, sizeof(options->follow_command)); memset(options->rsync_options, 0, sizeof(options->rsync_options)); + memset(options->ssh_options, 0, sizeof(options->ssh_options)); /* if nothing has been provided defaults to 60 */ options->master_response_timeout = 60; @@ -78,6 +79,8 @@ parse_config(const char *config_file, t_configuration_options *options) strncpy (options->conninfo, value, MAXLEN); else if (strcmp(name, "rsync_options") == 0) strncpy (options->rsync_options, value, QUERY_STR_LEN); + else if (strcmp(name, "ssh_options") == 0) + strncpy (options->ssh_options, value, QUERY_STR_LEN); else if (strcmp(name, "loglevel") == 0) strncpy (options->loglevel, value, MAXLEN); else if (strcmp(name, "logfacility") == 0) @@ -283,6 +286,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) strcpy(orig_options->promote_command, new_options.promote_command); strcpy(orig_options->follow_command, new_options.follow_command); strcpy(orig_options->rsync_options, new_options.rsync_options); + strcpy(orig_options->ssh_options, new_options.ssh_options); orig_options->master_response_timeout = new_options.master_response_timeout; orig_options->reconnect_attempts = new_options.reconnect_attempts; orig_options->reconnect_intvl = new_options.reconnect_intvl; diff --git a/config.h b/config.h index 4e4cdaf3..9e4d3c67 100644 --- a/config.h +++ b/config.h @@ -36,6 +36,7 @@ typedef struct char loglevel[MAXLEN]; char logfacility[MAXLEN]; char rsync_options[QUERY_STR_LEN]; + char ssh_options[QUERY_STR_LEN]; int master_response_timeout; int reconnect_attempts; int reconnect_intvl; diff --git a/repmgr.c b/repmgr.c index 111b4e5e..ea97358e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -86,7 +86,7 @@ bool require_password = false; /* Initialization of runtime options */ t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; -t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 }; +t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1 }; static char *server_mode = NULL; static char *server_cmd = NULL; @@ -1786,9 +1786,9 @@ test_ssh_connection(char *host, char *remote_user) /* Check if we have ssh connectivity to host before trying to rsync */ if (!remote_user[0]) - maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s", host, TRUEBIN_PATH); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s %s", options.ssh_options, host, TRUEBIN_PATH); else - maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s %s", host, remote_user, TRUEBIN_PATH); + maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH); log_debug(_("command is: %s"), script); r = system(script); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 9daaf3f4..86378b46 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -11,7 +11,8 @@ node_name=standby2 # Connection information conninfo='host=192.168.204.104' -rsync_options=--archive --checksum --compress --progress --rsh=ssh +rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" +ssh_options=-o "StrictHostKeyChecking no" # How many seconds we wait for master response before declaring master failure master_response_timeout=60 From 7f796e2d156d50e569b382997f82de25a208a35d Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 01:40:00 -0500 Subject: [PATCH 33/51] Update history and credit files --- CREDITS | 3 +++ HISTORY | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 753e80fd..6f556936 100644 --- a/CREDITS +++ b/CREDITS @@ -11,3 +11,6 @@ Cédric Villemain Charles Duffy Daniel Farina Shawn Ellis +Jay Taylor +Christian Kruse +Krzysztof Gajdemski diff --git a/HISTORY b/HISTORY index bfd36a17..68f84d1e 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,11 @@ -2.0beta 2012-07-27 +2.0beta2 2013-12-19 + Improve autofailover logic and algorithms (Jaime, Andres) + Ignore pg_log when cloning (Jaime) + Add timestamps to log line in stderr (Christian) + Correctly check wal_keep_segments (Jay Taylor) + Add a ssh_options parameter (Jay Taylor) + +2.0beta1 2012-07-27 Make CLONE command try to make an exact copy including $PGDATA location (Cedric) Add detection of master failure (Jaime) Add the notion of a witness server (Jaime) From 8e7b4878380ee4431fad5269b2a20da54c0f40c1 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 01:41:24 -0500 Subject: [PATCH 34/51] Update debian control file --- debian/DEBIAN/control | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debian/DEBIAN/control b/debian/DEBIAN/control index 4cadb9e1..e96a799a 100644 --- a/debian/DEBIAN/control +++ b/debian/DEBIAN/control @@ -1,9 +1,9 @@ Package: repmgr-auto -Version: 1.0-1 +Version: 2.0beta2 Section: database Priority: optional Architecture: all -Depends: rsync, postgresql-9.0 -Maintainer: Greg Smith +Depends: rsync, postgresql-9.0 | postgresql-9.1 | postgresql-9.2 | postgresql-9.3 +Maintainer: Jaime Casanova Description: PostgreSQL replication setup, magament and monitoring has two main executables From 6693b99288dfba321aaea628735e73654224b699 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 01:43:12 -0500 Subject: [PATCH 35/51] Files to create the debian package Patch by: Christian Kruse --- debian/repmgr.repmgrd.default | 14 ++++++++++ debian/repmgr.repmgrd.init | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 debian/repmgr.repmgrd.default create mode 100644 debian/repmgr.repmgrd.init diff --git a/debian/repmgr.repmgrd.default b/debian/repmgr.repmgrd.default new file mode 100644 index 00000000..bd57bf6a --- /dev/null +++ b/debian/repmgr.repmgrd.default @@ -0,0 +1,14 @@ +#!/bin/sh +# default settings for repmgrd. This file is source by /bin/sh from +# /etc/init.d/repmgrd + +# Options for repmgrd +REPMGRD_OPTS="" + +# repmgrd binary +REPMGR_BIN="/usr/bin/repmgr" + +# pid file +REPMGR_PIDFILE="/var/run/repmgrd.pid" + + diff --git a/debian/repmgr.repmgrd.init b/debian/repmgr.repmgrd.init new file mode 100644 index 00000000..11bd2dee --- /dev/null +++ b/debian/repmgr.repmgrd.init @@ -0,0 +1,48 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: repmgrd +# Required-Start: $local_fs $remote_fs $network $syslog $postgresql +# Required-Stop: $local_fs $remote_fs $network $syslog $postgresql +# Should-Start: $syslog $postgresql +# Should-Start: $syslog $postgresql +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Start/stop repmgrd +### END INIT INFO + +set -e + +if test -f /etc/default/repmgrd; then + . /etc/default/repmgrd +fi + +if [ -z "$REPMGRD_BIN" ]; then + REPMGRD_BIN="/usr/bin/repmgrd" +fi + +if [ -z "$REPMGRD_PIDFILE" ]; then + REPMGRD_PIDFILE="/var/run/repmgrd.pid" +fi + +test -x $REPMGRD_BIN || exit 0 + +case "$1" in + start) + start-stop-daemon --start --quiet --make-pidfile --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN $REPMGRD_OPTS + ;; + + stop) + start-stop-daemon --stop --oknodo --quiet --pidfile $REPMGRD_PIDFILE + ;; + + restart) + $0 stop && $0 start || exit 1 + ;; + + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 From 92092484201c888d2bc0f5bafd23e3900b3b14e4 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Thu, 19 Dec 2013 11:09:08 -0500 Subject: [PATCH 36/51] Fix oversight in the header of guc_setted_typed() --- dbutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbutils.h b/dbutils.h index 4ce7f0a6..b31a25a9 100644 --- a/dbutils.h +++ b/dbutils.h @@ -32,7 +32,7 @@ bool is_pgup(PGconn *conn, int timeout); char *pg_version(PGconn *conn, char* major_version); bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); -bool guc_setted(PGconn *conn, const char *parameter, const char *op, +bool guc_setted_typed(PGconn *conn, const char *parameter, const char *op, const char *value, const char *datatype); const char *get_cluster_size(PGconn *conn); PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, From b0cd2b5e43ebf5631c5699932d4b438470ccad1a Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 14:01:46 +0100 Subject: [PATCH 37/51] fix: do not exit() in create_pgdir() This could leave the database in a locked state (pg_start_backup()). And since all calls to create_pgdir() handle the return value correctly we simply replace the exit() by a return false --- check_dir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/check_dir.c b/check_dir.c index 5ae4c096..11aa56bb 100644 --- a/check_dir.c +++ b/check_dir.c @@ -256,7 +256,7 @@ create_pgdir(char *dir, bool force) { log_err(_("couldn't create directory \"%s\"...\n"), dir); - exit(ERR_BAD_CONFIG); + return false; } break; case 1: @@ -268,7 +268,7 @@ create_pgdir(char *dir, bool force) { log_err(_("could not change permissions of directory \"%s\": %s\n"), dir, strerror(errno)); - exit(ERR_BAD_CONFIG); + return false; } break; case 2: @@ -293,7 +293,7 @@ create_pgdir(char *dir, bool force) "If you are sure you want to clone here, " "please check there is no PostgreSQL server " "running and use the --force option\n")); - exit(ERR_BAD_CONFIG); + return false; } return false; @@ -301,7 +301,7 @@ create_pgdir(char *dir, bool force) /* Trouble accessing directory */ log_err(_("could not access directory \"%s\": %s\n"), dir, strerror(errno)); - exit(ERR_BAD_CONFIG); + return false; } return true; } From 9e2f276fcfcf66956109342f65fb9d654b0980c8 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 14:02:29 +0100 Subject: [PATCH 38/51] fix: do not exit after pg_start_backup() w/o pg_stop_backup() --- repmgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index ea97358e..b70ffa48 100644 --- a/repmgr.c +++ b/repmgr.c @@ -767,7 +767,7 @@ do_standby_clone(void) PGresult *res; char sqlquery[QUERY_STR_LEN]; - int r = 0; + int r = 0, retval = 0; int i; bool flag_success = false; bool test_mode = false; @@ -1036,6 +1036,8 @@ do_standby_clone(void) { log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"), progname, local_data_directory); + r = ERR_BAD_CONFIG; + retval = ERR_BAD_CONFIG; goto stop_backup; } @@ -1175,7 +1177,7 @@ stop_backup: log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(ERR_STOP_BACKUP); + exit(retval == 0 ? ERR_STOP_BACKUP : retval); } last_wal_segment = PQgetvalue(res, 0, 0); From a97065113d950daeb09bad730e92fb76a10c70f5 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 14:16:58 +0100 Subject: [PATCH 39/51] fix: remove own node earlier if force is set We have to remove our own node before we check for a new master if force is set; else master register would fail on the second time since there already is a master (ourselves), even if we specify -F --- repmgr.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/repmgr.c b/repmgr.c index b70ffa48..10b756f9 100644 --- a/repmgr.c +++ b/repmgr.c @@ -565,6 +565,22 @@ do_master_register(void) PGconn *master_conn; int id; + if (runtime_options.force) + { + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " + " WHERE id = %d", + repmgr_schema, options.node); + log_debug(_("master register: %s\n"), sqlquery); + + if (!PQexec(conn, sqlquery)) + { + log_warning(_("Cannot delete node details, %s\n"), + PQerrorMessage(conn)); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + } + /* Ensure there isn't any other master already registered */ master_conn = getMasterConnection(conn, repmgr_schema, options.cluster_name, &id,NULL); @@ -577,21 +593,6 @@ do_master_register(void) } /* Now register the master */ - if (runtime_options.force) - { - sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " - " WHERE id = %d", - repmgr_schema, options.node); - log_debug(_("master register: %s\n"), sqlquery); - - if (!PQexec(conn, sqlquery)) - { - log_warning(_("Cannot delete node details, %s\n"), - PQerrorMessage(conn)); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - } sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) " "VALUES (%d, '%s', '%s', '%s', %d)", From 7428e92e106627848f8433f6d4208dbe65cd2b3f Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 14:27:31 +0100 Subject: [PATCH 40/51] fix: correctly check the return value of PQexec() not only check if return value is not NULL but also check that the returned result is a PGRES_COMMAND_OK (e.g. the INSERT was successful) --- repmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 10b756f9..de9f0865 100644 --- a/repmgr.c +++ b/repmgr.c @@ -743,7 +743,8 @@ do_standby_register(void) options.conninfo, options.priority); log_debug(_("standby register: %s\n"), sqlquery); - if (!PQexec(master_conn, sqlquery)) + res = PQexec(master_conn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) { log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(master_conn)); From d0f3cb59c79a8ea98da5b592443466aa85698c90 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 14:42:55 +0100 Subject: [PATCH 41/51] fix: create data directory after sanity check --- repmgr.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/repmgr.c b/repmgr.c index de9f0865..15f6d043 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1500,14 +1500,6 @@ do_witness_create(void) char master_hba_file[MAXLEN]; - /* Check this directory could be used as a PGDATA dir */ - if (!create_pgdir(runtime_options.dest_dir, runtime_options.force)) - { - log_err(_("witness create: couldn't create data directory (\"%s\") for witness"), - runtime_options.dest_dir); - exit(ERR_BAD_CONFIG); - } - /* Connection parameters for master only */ keywords[0] = "host"; values[0] = runtime_options.host; @@ -1549,6 +1541,15 @@ do_witness_create(void) exit(ERR_BAD_SSH); } + /* Check this directory could be used as a PGDATA dir */ + if (!create_pgdir(runtime_options.dest_dir, runtime_options.force)) + { + log_err(_("witness create: couldn't create data directory (\"%s\") for witness"), + runtime_options.dest_dir); + exit(ERR_BAD_CONFIG); + } + + /* * To create a witness server we need to: * 1) initialize the cluster From 0068dd573a5e2b205ab704e5d4a0af317d85310f Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 15:52:29 +0100 Subject: [PATCH 42/51] fix: do not compare pointers but the strings --- config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.c b/config.c index 03b2e1c2..0d39f1a5 100644 --- a/config.c +++ b/config.c @@ -237,7 +237,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) return false; } - if (new_options.node_name != orig_options->node_name) + if (strcmp(new_options.node_name, orig_options->node_name) != 0) { log_warning(_("\nCannot change standby name, will keep current configuration.\n")); return false; From 9fe2d6886e810cc3e718683915bc20cf3adc2472 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Tue, 7 Jan 2014 16:42:06 +0100 Subject: [PATCH 43/51] white space cleanup --- repmgrd.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index bad8ebc5..506dadad 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -228,11 +228,11 @@ main(int argc, char **argv) /* * MAIN LOOP - * This loops cicles once per failover and at startup + * This loops cicles once per failover and at startup * Requisites: * - myLocalConn needs to be already setted with an active connection * - no master connection - */ + */ do { /* @@ -245,17 +245,17 @@ main(int argc, char **argv) myLocalMode = STANDBY_MODE; else /* is the master */ myLocalMode = PRIMARY_MODE; - + switch (myLocalMode) { case PRIMARY_MODE: primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; - + checkClusterConfiguration(myLocalConn, primaryConn); checkNodeConfiguration(local_options.conninfo); - + if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); @@ -263,11 +263,11 @@ main(int argc, char **argv) primaryConn = myLocalConn; update_registration(); } - + log_info(_("%s Starting continuous primary connection check\n"), progname); - + /* Check that primary is still alive, and standbies are sending info */ - + /* * Every SLEEP_MONITOR seconds, do master checks * XXX @@ -290,7 +290,7 @@ main(int argc, char **argv) */ exit(1); } - + if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ @@ -328,7 +328,7 @@ main(int argc, char **argv) myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } - + /* * Every SLEEP_MONITOR seconds, do checks */ @@ -340,7 +340,7 @@ main(int argc, char **argv) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); } - + do { if (myLocalMode == WITNESS_MODE) @@ -348,7 +348,7 @@ main(int argc, char **argv) else if (myLocalMode == STANDBY_MODE) StandbyMonitor(); sleep(SLEEP_MONITOR); - + if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ @@ -674,7 +674,7 @@ do_failover(void) nodes[i].is_ready = false; XLAssignValue(nodes[i].xlog_location, 0, 0); - log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), + log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false"); nodeConn = establishDBConnection(nodes[i].conninfostr, false); @@ -702,7 +702,7 @@ do_failover(void) exit(ERR_FAILOVER_FAIL); } - /* Query all the nodes to determine which ones are ready */ + /* Query all the nodes to determine which ones are ready */ for (i = 0; i < total_nodes; i++) { /* if the node is not visible, skip it */ @@ -713,9 +713,9 @@ do_failover(void) continue; nodeConn = establishDBConnection(nodes[i].conninfostr, false); - /* XXX + /* XXX * This shouldn't happen, if this happens it means this is a major problem - * maybe network outages? anyway, is better for a human to react + * maybe network outages? anyway, is better for a human to react */ if (PQstatus(nodeConn) != CONNECTION_OK) { @@ -737,7 +737,7 @@ do_failover(void) if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); - log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", + log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", nodes[i].nodeId, uxlogid, uxlogid, uxrecoff, uxrecoff); /* If position is 0/0, error */ @@ -766,7 +766,7 @@ do_failover(void) sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); update_shared_memory(last_wal_standby_applied); exit(ERR_DB_QUERY); - } + } /* write last location in shared memory */ update_shared_memory(PQgetvalue(res, 0, 0)); @@ -776,8 +776,8 @@ do_failover(void) { while (!nodes[i].is_ready) { - /* - * the witness will always be masked as ready if it's still + /* + * the witness will always be masked as ready if it's still * not marked that way and avoid a useless query */ if (nodes[i].is_witness) @@ -785,7 +785,7 @@ do_failover(void) if (!nodes[i].is_ready) { nodes[i].is_ready = true; - ready_nodes++; + ready_nodes++; } break; } @@ -799,9 +799,9 @@ do_failover(void) break; nodeConn = establishDBConnection(nodes[i].conninfostr, false); - /* XXX + /* XXX * This shouldn't happen, if this happens it means this is a major problem - * maybe network outages? anyway, is better for a human to react + * maybe network outages? anyway, is better for a human to react */ if (PQstatus(nodeConn) != CONNECTION_OK) { @@ -837,10 +837,10 @@ do_failover(void) } log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", - nodes[i].nodeId, uxlogid, uxlogid, + nodes[i].nodeId, uxlogid, uxlogid, uxrecoff, uxrecoff); - ready_nodes++; + ready_nodes++; nodes[i].is_ready = true; } } From 920f925e4b58de30f0eeb205cf655dcb6e80ef95 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Wed, 8 Jan 2014 11:53:15 +0100 Subject: [PATCH 44/51] added a new cli option --daemonize This option forks the process and generates a new session. This effectively detaches it from the shell. Don't forget to redirect stderr or use syslog for logging! --- errcode.h | 1 + repmgrd.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/errcode.h b/errcode.h index 208643c3..9bb84906 100644 --- a/errcode.h +++ b/errcode.h @@ -35,5 +35,6 @@ #define ERR_STR_OVERFLOW 10 #define ERR_FAILOVER_FAIL 11 #define ERR_BAD_SSH 12 +#define ERR_SYS_FAILURE 13 #endif /* _ERRCODE_H_ */ diff --git a/repmgrd.c b/repmgrd.c index 506dadad..6d616387 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -150,11 +150,13 @@ main(int argc, char **argv) {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"monitoring-history", no_argument, NULL, 'm'}, + {"daemonize", no_argument, NULL, 'd'}, {NULL, 0, NULL, 0} }; int optindex; int c; + bool daemonize = false; char standby_version[MAXVERSIONSTR]; @@ -174,7 +176,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "f:v:m", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "f:v:md", long_options, &optindex)) != -1) { switch (c) { @@ -187,12 +189,39 @@ main(int argc, char **argv) case 'm': monitoring_history = true; break; + case 'd': + daemonize = true; + break; default: usage(); exit(ERR_BAD_CONFIG); } } + if (daemonize) + { + pid_t pid = fork(); + switch (pid) + { + case -1: + log_err("Error in fork(): %s\n", strerror(errno)); + exit(ERR_SYS_FAILURE); + break; + + case 0: // child process + pid = setsid(); + if (pid == (pid_t)-1) + { + log_err("Error in setsid(): %s\n", strerror(errno)); + exit(ERR_SYS_FAILURE); + } + break; + + default: // parent process + exit(0); + } + } + setup_event_handlers(); /* @@ -1116,6 +1145,7 @@ void help(const char *progname) printf(_(" --verbose output verbose activity information\n")); printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n")); printf(_(" -f, --config_file=PATH configuration file\n")); + printf(_(" -d, --daemonize detach process from foreground\n")); printf(_("\n%s monitors a cluster of servers.\n"), progname); } From cbce29f009c2ae58ea27cb6c23b1fccc9c8b9bd9 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Wed, 8 Jan 2014 11:55:03 +0100 Subject: [PATCH 45/51] fixed typos --- config.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/config.c b/config.c index 0d39f1a5..3b6ffca6 100644 --- a/config.c +++ b/config.c @@ -221,49 +221,49 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) parse_config(config_file, &new_options); if (new_options.node == -1) { - log_warning(_("\nCannot load new configuration, will keep current one.\n")); + log_warning(_("Cannot load new configuration, will keep current one.\n")); return false; } if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) { - log_warning(_("\nCannot change cluster name, will keep current configuration.\n")); + log_warning(_("Cannot change cluster name, will keep current configuration.\n")); return false; } if (new_options.node != orig_options->node) { - log_warning(_("\nCannot change node number, will keep current configuration.\n")); + log_warning(_("Cannot change node number, will keep current configuration.\n")); return false; } if (strcmp(new_options.node_name, orig_options->node_name) != 0) { - log_warning(_("\nCannot change standby name, will keep current configuration.\n")); + log_warning(_("Cannot change standby name, will keep current configuration.\n")); return false; } if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER) { - log_warning(_("\nNew value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); + log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); return false; } if (new_options.master_response_timeout <= 0) { - log_warning(_("\nNew value for master_response_timeout is not valid. Should be greater than zero.\n")); + log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n")); return false; } if (new_options.reconnect_attempts < 0) { - log_warning(_("\nNew value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); + log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); return false; } if (new_options.reconnect_intvl < 0) { - log_warning(_("\nNew value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); + log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); return false; } @@ -271,7 +271,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options) conn = establishDBConnection(new_options.conninfo, false); if (!conn || (PQstatus(conn) != CONNECTION_OK)) { - log_warning(_("\nconninfo string is not valid, will keep current configuration.\n")); + log_warning(_("conninfo string is not valid, will keep current configuration.\n")); return false; } PQfinish(conn); From 634fdff303b8a054cf1da34d6728a169457c7f7c Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 12:57:16 +0100 Subject: [PATCH 46/51] fix: do not call setup_event_handlers() on WIN32 If we put setup_event_handlers() in #ifdef WIN32, we have to do it for the call and the declaration, too --- repmgrd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/repmgrd.c b/repmgrd.c index 6d616387..7c24cb68 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -131,7 +131,10 @@ static volatile sig_atomic_t got_SIGHUP = false; static void handle_sighup(SIGNAL_ARGS); static void handle_sigint(SIGNAL_ARGS); + +#ifndef WIN32 static void setup_event_handlers(void); +#endif #define CloseConnections() \ if (PQisBusy(primaryConn) == 1) \ @@ -222,7 +225,9 @@ main(int argc, char **argv) } } + #ifndef WIN32 setup_event_handlers(); + #endif /* * Read the configuration file: repmgr.conf From 0e8ff1730e7122a82bb732b76b83202ae3bad816 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 13:04:40 +0100 Subject: [PATCH 47/51] added handling of a PID file --- repmgrd.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 7c24cb68..4c88457b 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -22,6 +22,9 @@ #include +#include +#include + #include #include #include @@ -103,6 +106,8 @@ char repmgr_schema[MAXLEN]; bool failover_done = false; +char *pid_file = NULL; + /* * should initialize with {0} to be ANSI complaint ? but this raises * error with gcc -Wall @@ -154,6 +159,7 @@ main(int argc, char **argv) {"verbose", no_argument, NULL, 'v'}, {"monitoring-history", no_argument, NULL, 'm'}, {"daemonize", no_argument, NULL, 'd'}, + {"pid-file", required_argument, NULL, 'p'}, {NULL, 0, NULL, 0} }; @@ -179,7 +185,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "f:v:md", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1) { switch (c) { @@ -195,6 +201,9 @@ main(int argc, char **argv) case 'd': daemonize = true; break; + case 'p': + pid_file = optarg; + break; default: usage(); exit(ERR_BAD_CONFIG); @@ -225,6 +234,28 @@ main(int argc, char **argv) } } + if (pid_file) + { + struct stat st; + FILE *fd; + + if (stat(pid_file, &st) != -1) + { + log_err("PID file %s exists. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file); + exit(ERR_BAD_CONFIG); + } + + fd = fopen(pid_file, "w"); + if (fd == NULL) + { + log_err("Could not open PID file %s!\n", pid_file); + exit(ERR_BAD_CONFIG); + } + + fprintf(fd, "%d", getpid()); + fclose(fd); + } + #ifndef WIN32 setup_event_handlers(); #endif @@ -1151,6 +1182,7 @@ void help(const char *progname) printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n")); printf(_(" -f, --config_file=PATH configuration file\n")); printf(_(" -d, --daemonize detach process from foreground\n")); + printf(_(" -p, --pid-file=PATH write a PID file\n")); printf(_("\n%s monitors a cluster of servers.\n"), progname); } @@ -1161,6 +1193,12 @@ handle_sigint(SIGNAL_ARGS) { CloseConnections(); logger_shutdown(); + + if (pid_file) + { + remove(pid_file); + } + exit(1); } @@ -1176,6 +1214,7 @@ setup_event_handlers(void) { pqsignal(SIGHUP, handle_sighup); pqsignal(SIGINT, handle_sigint); + pqsignal(SIGTERM, handle_sigint); } #endif From 9f26254ac3e36c81c04d02abb0df30d259fbf23a Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 13:33:22 +0100 Subject: [PATCH 48/51] fix: added some missing initializers to avoid compiler warning --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 15f6d043..b6a9bc9e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -86,7 +86,7 @@ bool require_password = false; /* Initialization of runtime options */ t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; -t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1 }; +t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1 }; static char *server_mode = NULL; static char *server_cmd = NULL; From 6e3fe059d8bbcbb6b0c3bdcd8d446e1401e6e563 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 14:12:37 +0100 Subject: [PATCH 49/51] added config options pg_bindir and pg_ctl_options --- config.c | 12 ++++++++++++ config.h | 2 ++ repmgr.c | 19 ++++++++----------- repmgr.conf.sample | 8 ++++++++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/config.c b/config.c index 3b6ffca6..6f8f0362 100644 --- a/config.c +++ b/config.c @@ -42,6 +42,8 @@ parse_config(const char *config_file, t_configuration_options *options) memset(options->follow_command, 0, sizeof(options->follow_command)); memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->ssh_options, 0, sizeof(options->ssh_options)); + memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); + memset(options->pgctl_options, 0, sizeof(options->pgctl_options)); /* if nothing has been provided defaults to 60 */ options->master_response_timeout = 60; @@ -114,6 +116,10 @@ parse_config(const char *config_file, t_configuration_options *options) options->reconnect_attempts = atoi(value); else if (strcmp(name, "reconnect_interval") == 0) options->reconnect_intvl = atoi(value); + else if (strcmp(name, "pg_bindir") == 0) + strncpy (options->pg_bindir, value, MAXLEN); + else if (strcmp(name, "pg_ctl_options") == 0) + strncpy (options->pgctl_options, value, MAXLEN); else log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value); } @@ -151,6 +157,12 @@ parse_config(const char *config_file, t_configuration_options *options) log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n")); exit(ERR_BAD_CONFIG); } + + if (*options->pg_bindir == '\0') + { + log_err(_("pg_bindir config value not found. Check the configuration file.\n")); + exit(ERR_BAD_CONFIG); + } } diff --git a/config.h b/config.h index 9e4d3c67..cd4e9a7d 100644 --- a/config.h +++ b/config.h @@ -40,6 +40,8 @@ typedef struct int master_response_timeout; int reconnect_attempts; int reconnect_intvl; + char pg_bindir[MAXLEN]; + char pgctl_options[MAXLEN]; } t_configuration_options; void parse_config(const char *config_file, t_configuration_options *options); diff --git a/repmgr.c b/repmgr.c index b6a9bc9e..aa7f1acc 100644 --- a/repmgr.c +++ b/repmgr.c @@ -86,7 +86,7 @@ bool require_password = false; /* Initialization of runtime options */ t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; -t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1 }; +t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "" }; static char *server_mode = NULL; static char *server_cmd = NULL; @@ -1317,13 +1317,12 @@ do_standby_promote(void) rename(recovery_file_path, recovery_done_path); /* - * We assume the pg_ctl script is in the PATH. Restart and wait for - * the server to finish starting, so that the check below will - * find an active server rather than one starting up. This may + * Restart and wait for the server to finish starting, so that the check + * below will find an active server rather than one starting up. This may * hang for up the default timeout (60 seconds). */ - log_notice(_("%s: restarting server using pg_ctl\n"), progname); - maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir); + log_notice(_("%s: restarting server using %s/pg_ctl\n"), progname, options.pg_bindir); + maxlen_snprintf(script, "%s/pg_ctl %s -D %s -w -m fast restart", options.pg_bindir, options.pgctl_options, data_dir); r = system(script); if (r != 0) { @@ -1468,8 +1467,7 @@ do_standby_follow(void) exit(ERR_BAD_CONFIG); /* Finally, restart the service */ - /* We assume the pg_ctl script is in the PATH */ - maxlen_snprintf(script, "pg_ctl -w -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "%s/pg_ctl %s -w -D %s -m fast restart", options.pg_bindir, options.pgctl_options, data_dir); r = system(script); if (r != 0) { @@ -1558,8 +1556,7 @@ do_witness_create(void) */ /* Create the cluster for witness */ - /* We assume the pg_ctl script is in the PATH */ - sprintf(script, "pg_ctl -D %s init -o \"-W\"", runtime_options.dest_dir); + sprintf(script, "%s/pg_ctl %s -D %s init -o \"-W\"", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir); log_info("Initialize cluster for witness: %s.\n", script); r = system(script); @@ -1632,7 +1629,7 @@ do_witness_create(void) } /* start new instance */ - sprintf(script, "pg_ctl -w -D %s start", runtime_options.dest_dir); + sprintf(script, "%s/pg_ctl %s -w -D %s start", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir); log_info(_("Start cluster for witness: %s"), script); r = system(script); if (r != 0) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 86378b46..8a8de2d4 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -34,3 +34,11 @@ loglevel=NOTICE # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER # Default: STDERR logfacility=STDERR + +# path to pg_ctl executable +pg_bindir=/usr/bin/ + +# +# you may add command line arguments for pg_ctl +# +# pg_ctl_options='-s' \ No newline at end of file From 4c3d7f80ed2c252f541e58bbe8d00e96f224cd49 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 14:29:14 +0100 Subject: [PATCH 50/51] now code compiles with -ansi -pedantic and has less warnings --- check_dir.c | 4 ++-- config.h | 2 ++ dbutils.c | 6 +++--- repmgr.c | 4 ++-- repmgr.h | 2 ++ repmgrd.c | 10 +++++----- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/check_dir.c b/check_dir.c index 11aa56bb..f578b4e1 100644 --- a/check_dir.c +++ b/check_dir.c @@ -225,12 +225,12 @@ is_pg_dir(char *dir) struct stat sb; int r; - // test pgdata + /* test pgdata */ xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); if (stat(path, &sb) == 0) return true; - // test tablespace dir + /* test tablespace dir */ sprintf(path, "ls %s/PG_*/ -I*", dir); r = system(path); if (r == 0) diff --git a/config.h b/config.h index cd4e9a7d..8c8bbea1 100644 --- a/config.h +++ b/config.h @@ -44,6 +44,8 @@ typedef struct char pgctl_options[MAXLEN]; } t_configuration_options; +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "" } + void parse_config(const char *config_file, t_configuration_options *options); void parse_line(char *buff, char *name, char *value); char *trim(char *s); diff --git a/dbutils.c b/dbutils.c index 9157a6e9..51d768e2 100644 --- a/dbutils.c +++ b/dbutils.c @@ -138,7 +138,7 @@ is_pgup(PGconn *conn, int timeout) { if (twice) return false; - PQreset(conn); // reconnect + PQreset(conn); /* reconnect */ twice = true; } else @@ -164,10 +164,10 @@ is_pgup(PGconn *conn, int timeout) break; failed: - // we need to retry, because we might just have loose the connection once + /* we need to retry, because we might just have loose the connection once */ if (twice) return false; - PQreset(conn); // reconnect + PQreset(conn); /* reconnect */ twice = true; } } diff --git a/repmgr.c b/repmgr.c index aa7f1acc..40e964ad 100644 --- a/repmgr.c +++ b/repmgr.c @@ -85,8 +85,8 @@ bool need_a_node = true; bool require_password = false; /* Initialization of runtime options */ -t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; -t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "" }; +t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER; +t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER; static char *server_mode = NULL; static char *server_cmd = NULL; diff --git a/repmgr.h b/repmgr.h index 295a4645..a2a0ac8e 100644 --- a/repmgr.h +++ b/repmgr.h @@ -69,6 +69,8 @@ typedef struct int keep_history; } t_runtime_options; +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 } + #define SLEEP_MONITOR 2 #endif diff --git a/repmgrd.c b/repmgrd.c index 4c88457b..8b521fca 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -112,7 +112,7 @@ char *pid_file = NULL; * should initialize with {0} to be ANSI complaint ? but this raises * error with gcc -Wall */ -t_configuration_options config = {}; +t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER; static void help(const char* progname); static void usage(void); @@ -220,7 +220,7 @@ main(int argc, char **argv) exit(ERR_SYS_FAILURE); break; - case 0: // child process + case 0: /* child process */ pid = setsid(); if (pid == (pid_t)-1) { @@ -229,7 +229,7 @@ main(int argc, char **argv) } break; - default: // parent process + default: /* parent process */ exit(0); } } @@ -461,7 +461,7 @@ WitnessMonitor(void) * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, return false. */ - CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds + CheckPrimaryConnection(); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */ if (PQstatus(primaryConn) != CONNECTION_OK) { @@ -546,7 +546,7 @@ StandbyMonitor(void) * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, try to get a new master. */ - CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds + CheckPrimaryConnection(); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */ if (PQstatus(primaryConn) != CONNECTION_OK) { From a0fdadd5d23a4af59b617a66c848b668dddf19b0 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Thu, 9 Jan 2014 15:35:44 +0100 Subject: [PATCH 51/51] this way it is much cleaner --- repmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index 40e964ad..85847e86 100644 --- a/repmgr.c +++ b/repmgr.c @@ -769,7 +769,7 @@ do_standby_clone(void) PGresult *res; char sqlquery[QUERY_STR_LEN]; - int r = 0, retval = 0; + int r = 0, retval = SUCCESS; int i; bool flag_success = false; bool test_mode = false; @@ -1179,7 +1179,7 @@ stop_backup: log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn)); PQclear(res); PQfinish(conn); - exit(retval == 0 ? ERR_STOP_BACKUP : retval); + exit(retval); } last_wal_segment = PQgetvalue(res, 0, 0);