From 37892afcfc90e5c382c3645339015d472f8aed0b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 7 Mar 2019 10:34:28 +0900 Subject: [PATCH] Add configuration option "primary_visibility_consensus" This determines whether repmgrd should continue with a failover if one or more nodes report they can still see the standby. --- configfile.c | 6 ++++-- configfile.h | 3 ++- repmgr.conf.sample | 2 ++ repmgrd-physical.c | 16 ++++++++++------ 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/configfile.c b/configfile.c index 82223151..52b6cf75 100644 --- a/configfile.c +++ b/configfile.c @@ -361,6 +361,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->standby_disconnect_on_failover = false; options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT; options->connection_check_type = CHECK_PING; + options->primary_visibility_consensus = false; /*------------- * witness settings @@ -641,6 +642,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * _("value for \"connection_check_type\" must be \"ping\" or \"connection\"\n")); } } + else if (strcmp(name, "primary_visibility_consensus") == 0) + options->primary_visibility_consensus = parse_bool(value, name, error_list); /* witness settings */ else if (strcmp(name, "witness_sync_interval") == 0) @@ -1227,8 +1230,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type) { strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN); log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo); - } + PQfinish(conn); } @@ -1306,7 +1309,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type) config_changed = true; } - /* promote_command */ if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0) { diff --git a/configfile.h b/configfile.h index e4b257a5..b4a20b82 100644 --- a/configfile.h +++ b/configfile.h @@ -144,6 +144,7 @@ typedef struct bool standby_disconnect_on_failover; int sibling_nodes_disconnect_timeout; ConnectionCheckType connection_check_type; + bool primary_visibility_consensus; /* BDR settings */ bool bdr_local_monitoring_only; @@ -215,7 +216,7 @@ typedef struct false, -1, \ DEFAULT_ASYNC_QUERY_TIMEOUT, \ DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ - -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, \ + -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, true, \ /* BDR settings */ \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 67edfcd0..34ebccd5 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -327,6 +327,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" #sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds) # to wait for other standbys to confirm they have disconnected their # WAL receivers +#primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen + # the primary node recently #------------------------------------------------------------------------------ # service control commands diff --git a/repmgrd-physical.c b/repmgrd-physical.c index f0341f1d..b372b083 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -3526,20 +3526,24 @@ do_election(void) if (nodes_with_primary_still_visible > 0) { - log_notice(_("%i nodes can see the primary"), + log_info(_("%i nodes can see the primary"), nodes_with_primary_still_visible); log_detail(_("following nodes can see the primary:\n%s"), nodes_with_primary_visible.data); - monitoring_state = MS_DEGRADED; - INSTR_TIME_SET_CURRENT(degraded_monitoring_start); + if (config_file_options.primary_visibility_consensus == true) + { + log_notice(_("cancelling failover as some nodes can still see the primary")); + monitoring_state = MS_DEGRADED; + INSTR_TIME_SET_CURRENT(degraded_monitoring_start); - reset_node_voting_status(); + reset_node_voting_status(); - termPQExpBuffer(&nodes_with_primary_visible); + termPQExpBuffer(&nodes_with_primary_visible); - return ELECTION_CANCELLED; + return ELECTION_CANCELLED; + } } termPQExpBuffer(&nodes_with_primary_visible);