From 78e6bdeebe3066d6dbed2094ab230e23c1b84139 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Sep 2017 13:42:50 +0900 Subject: [PATCH] Have repmgrd parse "standby follow --upstream-node-id=%n" --- README.md | 8 ++++++- dbutils.c | 8 +++++++ repmgr-action-standby.c | 12 +++++++--- repmgr.conf.sample | 21 +++++++++++------ repmgrd-physical.c | 18 +++++++++++---- strutil.c | 51 +++++++++++++++++++++++++++++++++++++++++ strutil.h | 2 ++ 7 files changed, 105 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 64ba5f28..9444aeed 100644 --- a/README.md +++ b/README.md @@ -1250,12 +1250,18 @@ Additionally the following `repmgrd` options *must* be set in `repmgr.conf` failover=automatic promote_command='repmgr standby promote -f /etc/repmgr.conf --log-to-file' - follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file' + follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n' Note that the `--log-to-file` option will cause `repmgr`'s output to be logged to the destination configured to receive log output for `repmgrd`. See `repmgr.conf.sample` for further `repmgrd`-specific settings +The `follow_command` should provide the `--upstream-node-id=%n` option to +`repmgr standby follow`; the `%n` will be replaced by `repmgrd` with the ID +of the new primary. If this is not provided, if the original primary comes back +online after the new primary is promoted, there is a risk that +`repmgr standby follow` will follow the original primary. + When `failover` is set to `automatic`, upon detecting failure of the current primary, `repmgrd` will execute one of `promote_command` or `follow_command`, depending on whether the current server is to become the new primary, or diff --git a/dbutils.c b/dbutils.c index 9e2da2ea..4c4c0bb5 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2977,6 +2977,14 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char { switch (src_ptr[1]) { + case '%': + /* %%: replace with % */ + if (dst_ptr < end_ptr) + { + src_ptr++; + *dst_ptr++ = *src_ptr; + } + break; case 'n': /* %n: node id */ src_ptr++; diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 53f14b44..b934d0e9 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1431,7 +1431,13 @@ do_standby_follow(void) if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE) { - // XXX check not self! + /* check not self! */ + if (runtime_options.upstream_node_id == config_file_options.node_id) + { + log_error(_("provided \"--upstream-node-id\" %i is the current node!"), + runtime_options.upstream_node_id); + exit(ERR_BAD_CONFIG); + } record_status = get_node_record(local_conn, runtime_options.upstream_node_id, &primary_node_record); @@ -1445,10 +1451,11 @@ do_standby_follow(void) for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++) { - primary_conn = establish_db_connection(config_file_options.conninfo, true); + primary_conn = establish_db_connection(primary_node_record.conninfo, true); if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false) { + log_debug("setting primary id to %i", runtime_options.upstream_node_id); primary_id = runtime_options.upstream_node_id; break; } @@ -1690,7 +1697,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor /* start/restart the service */ // XXX here check if service is running!! if not, start - // ensure that problem with pg_ctl output is caught here { char server_command[MAXLEN] = ""; diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 9d2441ac..5ab889ff 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -210,15 +210,22 @@ ssh_options='-q' # Options to append to "ssh" # 'manual': repmgrd will take no action and the node will require # manual attention to reattach it to replication # (does not apply to BDR mode) -#monitoring_history=no - +#promote_command= # command to execute when promoting a new primary; use something like: + # + # repmgr standby promote -f /etc/repmgr.conf + # +#follow_command= # command to execute when instructing a standby to follow a new primary; + # use something like: + # + # repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n + # #primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby # will wait for a notification from the new primary, - # before falling back to degraded monitoriong -#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd - # will terminate if the server being monitored - # is no longer available. -1 (default) disables the - # timeout completely. + # before falling back to degraded monitoring +#monitoring_history=no +#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the + # server being monitored is no longer available. -1 (default) + # disables the timeout completely. #async_query_timeout=60 # Interval (in seconds) which repmgrd will wait before # cancelling an asynchronous query. diff --git a/repmgrd-physical.c b/repmgrd-physical.c index c27578da..162501bd 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -1661,6 +1661,8 @@ wait_primary_notification(int *new_primary_id) static FailoverState follow_new_primary(int new_primary_id) { + char parsed_follow_command[MAXPGPATH] = ""; + PQExpBufferData event_details; int r; @@ -1695,9 +1697,6 @@ follow_new_primary(int new_primary_id) fflush(stderr); } - log_debug(_("standby follow command is:\n \"%s\""), - config_file_options.follow_command); - upstream_conn = establish_db_connection(new_primary.conninfo, false); if (PQstatus(upstream_conn) == CONNECTION_OK) @@ -1709,6 +1708,7 @@ follow_new_primary(int new_primary_id) } else { + new_primary_ok = false; log_warning(_("new primary is not in recovery")); PQfinish(upstream_conn); } @@ -1727,8 +1727,18 @@ follow_new_primary(int new_primary_id) PQfinish(local_conn); local_conn = NULL; + /* + * replace %n in "config_file_options.follow_command" with ID of primary + * to follow. + */ + parse_follow_command(parsed_follow_command, config_file_options.follow_command, new_primary_id); + + log_debug(_("standby follow command is:\n \"%s\""), + parsed_follow_command); + + /* execute the follow command */ - r = system(config_file_options.follow_command); + r = system(parsed_follow_command); if (r != 0) { diff --git a/strutil.c b/strutil.c index c37855b4..ef14babf 100644 --- a/strutil.c +++ b/strutil.c @@ -410,3 +410,54 @@ trim(char *s) return s; } + + +void +parse_follow_command(char *parsed_command, char *template, int node_id) +{ + const char *src_ptr = NULL; + char *dst_ptr = NULL; + char *end_ptr = NULL; + + dst_ptr = parsed_command; + end_ptr = parsed_command + MAXPGPATH - 1; + *end_ptr = '\0'; + + for(src_ptr = template; *src_ptr; src_ptr++) + { + if (*src_ptr == '%') + { + switch (src_ptr[1]) + { + case '%': + /* %%: replace with % */ + if (dst_ptr < end_ptr) + { + src_ptr++; + *dst_ptr++ = *src_ptr; + } + break; + case 'n': + /* %n: node id */ + src_ptr++; + snprintf(dst_ptr, end_ptr - dst_ptr, "%i", node_id); + dst_ptr += strlen(dst_ptr); + break; + default: + /* otherwise treat the % as not special */ + if (dst_ptr < end_ptr) + *dst_ptr++ = *src_ptr; + break; + } + } + else + { + if (dst_ptr < end_ptr) + *dst_ptr++ = *src_ptr; + } + } + + *dst_ptr = '\0'; + + return; +} diff --git a/strutil.h b/strutil.h index d08761a1..4012af15 100644 --- a/strutil.h +++ b/strutil.h @@ -142,5 +142,7 @@ extern char extern char *trim(char *s); +extern void +parse_follow_command(char *parsed_command, char *template, int node_id); #endif /* _STRUTIL_H_ */