Add configuration option "primary_visibility_consensus"

This determines whether repmgrd should continue with a failover if
one or more nodes report they can still see the standby.
This commit is contained in:
Ian Barwick
2019-03-07 10:34:28 +09:00
parent e4e5e35552
commit 37892afcfc
4 changed files with 18 additions and 9 deletions

View File

@@ -361,6 +361,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->standby_disconnect_on_failover = false; options->standby_disconnect_on_failover = false;
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT; options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
options->connection_check_type = CHECK_PING; options->connection_check_type = CHECK_PING;
options->primary_visibility_consensus = false;
/*------------- /*-------------
* witness settings * witness settings
@@ -641,6 +642,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
_("value for \"connection_check_type\" must be \"ping\" or \"connection\"\n")); _("value for \"connection_check_type\" must be \"ping\" or \"connection\"\n"));
} }
} }
else if (strcmp(name, "primary_visibility_consensus") == 0)
options->primary_visibility_consensus = parse_bool(value, name, error_list);
/* witness settings */ /* witness settings */
else if (strcmp(name, "witness_sync_interval") == 0) else if (strcmp(name, "witness_sync_interval") == 0)
@@ -1227,8 +1230,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
{ {
strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN); strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN);
log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo); log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo);
} }
PQfinish(conn); PQfinish(conn);
} }
@@ -1306,7 +1309,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
config_changed = true; config_changed = true;
} }
/* promote_command */ /* promote_command */
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0) if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
{ {

View File

@@ -144,6 +144,7 @@ typedef struct
bool standby_disconnect_on_failover; bool standby_disconnect_on_failover;
int sibling_nodes_disconnect_timeout; int sibling_nodes_disconnect_timeout;
ConnectionCheckType connection_check_type; ConnectionCheckType connection_check_type;
bool primary_visibility_consensus;
/* BDR settings */ /* BDR settings */
bool bdr_local_monitoring_only; bool bdr_local_monitoring_only;
@@ -215,7 +216,7 @@ typedef struct
false, -1, \ false, -1, \
DEFAULT_ASYNC_QUERY_TIMEOUT, \ DEFAULT_ASYNC_QUERY_TIMEOUT, \
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, \ -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, true, \
/* BDR settings */ \ /* BDR settings */ \
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
/* service settings */ \ /* service settings */ \

View File

@@ -327,6 +327,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds) #sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds)
# to wait for other standbys to confirm they have disconnected their # to wait for other standbys to confirm they have disconnected their
# WAL receivers # WAL receivers
#primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen
# the primary node recently
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# service control commands # service control commands

View File

@@ -3526,20 +3526,24 @@ do_election(void)
if (nodes_with_primary_still_visible > 0) if (nodes_with_primary_still_visible > 0)
{ {
log_notice(_("%i nodes can see the primary"), log_info(_("%i nodes can see the primary"),
nodes_with_primary_still_visible); nodes_with_primary_still_visible);
log_detail(_("following nodes can see the primary:\n%s"), log_detail(_("following nodes can see the primary:\n%s"),
nodes_with_primary_visible.data); nodes_with_primary_visible.data);
monitoring_state = MS_DEGRADED; if (config_file_options.primary_visibility_consensus == true)
INSTR_TIME_SET_CURRENT(degraded_monitoring_start); {
log_notice(_("cancelling failover as some nodes can still see the primary"));
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
reset_node_voting_status(); reset_node_voting_status();
termPQExpBuffer(&nodes_with_primary_visible); termPQExpBuffer(&nodes_with_primary_visible);
return ELECTION_CANCELLED; return ELECTION_CANCELLED;
}
} }
termPQExpBuffer(&nodes_with_primary_visible); termPQExpBuffer(&nodes_with_primary_visible);