Add an option for STANDBY FOLLOW to wait for a master to appear.

This is important for autofailover to do the right thing when
standbys detected master death at different times.

While this is a new option, seems important for the autofailover
to work properly so i will consider the lack of it a bug and
will backpatch to 2.0 where autofailover was introduced.

For gripe from Alex Railean, about a standby not finding the new
master because the new master hasn't finish promoting.
This commit is contained in:
Jaime Casanova
2012-11-11 10:07:34 -05:00
parent bbdcffa813
commit d6bd5aa381
3 changed files with 21 additions and 7 deletions

View File

@@ -85,7 +85,7 @@ bool need_a_node = true;
bool require_password = false; bool require_password = false;
/* Initialization of runtime options */ /* Initialization of runtime options */
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", "", 0 }; t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 };
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 }; t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 };
static char *server_mode = NULL; static char *server_mode = NULL;
@@ -107,6 +107,7 @@ main(int argc, char **argv)
{"wal-keep-segments", required_argument, NULL, 'w'}, {"wal-keep-segments", required_argument, NULL, 'w'},
{"keep-history", required_argument, NULL, 'k'}, {"keep-history", required_argument, NULL, 'k'},
{"force", no_argument, NULL, 'F'}, {"force", no_argument, NULL, 'F'},
{"wait", no_argument, NULL, 'W'},
{"ignore-rsync-warning", no_argument, NULL, 'I'}, {"ignore-rsync-warning", no_argument, NULL, 'I'},
{"verbose", no_argument, NULL, 'v'}, {"verbose", no_argument, NULL, 'v'},
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
@@ -133,7 +134,7 @@ main(int argc, char **argv)
} }
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:I:v", long_options, while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:W:I:v", long_options,
&optindex)) != -1) &optindex)) != -1)
{ {
switch (c) switch (c)
@@ -177,6 +178,9 @@ main(int argc, char **argv)
case 'F': case 'F':
runtime_options.force = true; runtime_options.force = true;
break; break;
case 'W':
runtime_options.wait_for_master = true;
break;
case 'I': case 'I':
runtime_options.ignore_rsync_warn = true; runtime_options.ignore_rsync_warn = true;
break; break;
@@ -1378,10 +1382,18 @@ do_standby_follow(void)
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* we also need to check if there is any master in the cluster */ /*
log_info(_("%s connecting to master database\n"), progname); * we also need to check if there is any master in the cluster
master_conn = getMasterConnection(conn, repmgr_schema, * or wait for one to appear if we have set the wait option
options.cluster_name, &master_id,(char *) &master_conninfo); */
log_info(_("%s discovering new master...\n"), progname);
do
{
master_conn = getMasterConnection(conn, repmgr_schema,
options.cluster_name, &master_id,(char *) &master_conninfo);
} while (master_conn == NULL && runtime_options.wait_for_master);
if (master_conn == NULL) if (master_conn == NULL)
{ {
log_err(_("There isn't a master to follow in this cluster\n")); log_err(_("There isn't a master to follow in this cluster\n"));
@@ -1700,6 +1712,7 @@ help(const char *progname)
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n")); printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n")); printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_(" -F, --force force potentially dangerous operations to happen\n"));
printf(_(" -W, --wait wait for a master to appear"));
printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
printf(_("or making follow another node and then exits.\n")); printf(_("or making follow another node and then exits.\n"));

View File

@@ -24,7 +24,7 @@ reconnect_interval=10
failover=automatic failover=automatic
priority=-1 priority=-1
promote_command='repmgr standby promote -f /path/to/repmgr.conf' promote_command='repmgr standby promote -f /path/to/repmgr.conf'
follow_command='repmgr standby follow -f /path/to/repmgr.conf' follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
# Default: NOTICE # Default: NOTICE

View File

@@ -59,6 +59,7 @@ typedef struct
char wal_keep_segments[MAXLEN]; char wal_keep_segments[MAXLEN];
bool verbose; bool verbose;
bool force; bool force;
bool wait_for_master;
bool ignore_rsync_warn; bool ignore_rsync_warn;
char masterport[MAXLEN]; char masterport[MAXLEN];