Have repmgrd parse "standby follow --upstream-node-id=%n"

This commit is contained in:
Ian Barwick
2017-09-04 13:42:50 +09:00
parent 9a0f45d7d3
commit 78e6bdeebe
7 changed files with 105 additions and 15 deletions

View File

@@ -1250,12 +1250,18 @@ Additionally the following `repmgrd` options *must* be set in `repmgr.conf`
failover=automatic
promote_command='repmgr standby promote -f /etc/repmgr.conf --log-to-file'
follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file'
follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'
Note that the `--log-to-file` option will cause `repmgr`'s output to be logged to
the destination configured to receive log output for `repmgrd`.
See `repmgr.conf.sample` for further `repmgrd`-specific settings
The `follow_command` should provide the `--upstream-node-id=%n` option to
`repmgr standby follow`; the `%n` will be replaced by `repmgrd` with the ID
of the new primary. If this is not provided, if the original primary comes back
online after the new primary is promoted, there is a risk that
`repmgr standby follow` will follow the original primary.
When `failover` is set to `automatic`, upon detecting failure of the current
primary, `repmgrd` will execute one of `promote_command` or `follow_command`,
depending on whether the current server is to become the new primary, or

View File

@@ -2977,6 +2977,14 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
{
switch (src_ptr[1])
{
case '%':
/* %%: replace with % */
if (dst_ptr < end_ptr)
{
src_ptr++;
*dst_ptr++ = *src_ptr;
}
break;
case 'n':
/* %n: node id */
src_ptr++;

View File

@@ -1431,7 +1431,13 @@ do_standby_follow(void)
if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE)
{
// XXX check not self!
/* check not self! */
if (runtime_options.upstream_node_id == config_file_options.node_id)
{
log_error(_("provided \"--upstream-node-id\" %i is the current node!"),
runtime_options.upstream_node_id);
exit(ERR_BAD_CONFIG);
}
record_status = get_node_record(local_conn, runtime_options.upstream_node_id, &primary_node_record);
@@ -1445,10 +1451,11 @@ do_standby_follow(void)
for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
{
primary_conn = establish_db_connection(config_file_options.conninfo, true);
primary_conn = establish_db_connection(primary_node_record.conninfo, true);
if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
{
log_debug("setting primary id to %i", runtime_options.upstream_node_id);
primary_id = runtime_options.upstream_node_id;
break;
}
@@ -1690,7 +1697,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
/* start/restart the service */
// XXX here check if service is running!! if not, start
// ensure that problem with pg_ctl output is caught here
{
char server_command[MAXLEN] = "";

View File

@@ -210,15 +210,22 @@ ssh_options='-q' # Options to append to "ssh"
# 'manual': repmgrd will take no action and the node will require
# manual attention to reattach it to replication
# (does not apply to BDR mode)
#monitoring_history=no
#promote_command= # command to execute when promoting a new primary; use something like:
#
# repmgr standby promote -f /etc/repmgr.conf
#
#follow_command= # command to execute when instructing a standby to follow a new primary;
# use something like:
#
# repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
#
#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby
# will wait for a notification from the new primary,
# before falling back to degraded monitoriong
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd
# will terminate if the server being monitored
# is no longer available. -1 (default) disables the
# timeout completely.
# before falling back to degraded monitoring
#monitoring_history=no
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
# server being monitored is no longer available. -1 (default)
# disables the timeout completely.
#async_query_timeout=60 # Interval (in seconds) which repmgrd will wait before
# cancelling an asynchronous query.

View File

@@ -1661,6 +1661,8 @@ wait_primary_notification(int *new_primary_id)
static FailoverState
follow_new_primary(int new_primary_id)
{
char parsed_follow_command[MAXPGPATH] = "";
PQExpBufferData event_details;
int r;
@@ -1695,9 +1697,6 @@ follow_new_primary(int new_primary_id)
fflush(stderr);
}
log_debug(_("standby follow command is:\n \"%s\""),
config_file_options.follow_command);
upstream_conn = establish_db_connection(new_primary.conninfo, false);
if (PQstatus(upstream_conn) == CONNECTION_OK)
@@ -1709,6 +1708,7 @@ follow_new_primary(int new_primary_id)
}
else
{
new_primary_ok = false;
log_warning(_("new primary is not in recovery"));
PQfinish(upstream_conn);
}
@@ -1727,8 +1727,18 @@ follow_new_primary(int new_primary_id)
PQfinish(local_conn);
local_conn = NULL;
/*
* replace %n in "config_file_options.follow_command" with ID of primary
* to follow.
*/
parse_follow_command(parsed_follow_command, config_file_options.follow_command, new_primary_id);
log_debug(_("standby follow command is:\n \"%s\""),
parsed_follow_command);
/* execute the follow command */
r = system(config_file_options.follow_command);
r = system(parsed_follow_command);
if (r != 0)
{

View File

@@ -410,3 +410,54 @@ trim(char *s)
return s;
}
void
parse_follow_command(char *parsed_command, char *template, int node_id)
{
const char *src_ptr = NULL;
char *dst_ptr = NULL;
char *end_ptr = NULL;
dst_ptr = parsed_command;
end_ptr = parsed_command + MAXPGPATH - 1;
*end_ptr = '\0';
for(src_ptr = template; *src_ptr; src_ptr++)
{
if (*src_ptr == '%')
{
switch (src_ptr[1])
{
case '%':
/* %%: replace with % */
if (dst_ptr < end_ptr)
{
src_ptr++;
*dst_ptr++ = *src_ptr;
}
break;
case 'n':
/* %n: node id */
src_ptr++;
snprintf(dst_ptr, end_ptr - dst_ptr, "%i", node_id);
dst_ptr += strlen(dst_ptr);
break;
default:
/* otherwise treat the % as not special */
if (dst_ptr < end_ptr)
*dst_ptr++ = *src_ptr;
break;
}
}
else
{
if (dst_ptr < end_ptr)
*dst_ptr++ = *src_ptr;
}
}
*dst_ptr = '\0';
return;
}

View File

@@ -142,5 +142,7 @@ extern char
extern char *trim(char *s);
extern void
parse_follow_command(char *parsed_command, char *template, int node_id);
#endif /* _STRUTIL_H_ */