Add configuration option "primary_visibility_consensus"

This determines whether repmgrd should continue with a failover if
one or more nodes report they can still see the standby.
This commit is contained in:
Ian Barwick
2019-03-07 10:34:28 +09:00
parent e4e5e35552
commit 37892afcfc
4 changed files with 18 additions and 9 deletions

View File

@@ -361,6 +361,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->standby_disconnect_on_failover = false;
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
options->connection_check_type = CHECK_PING;
options->primary_visibility_consensus = false;
/*-------------
* witness settings
@@ -641,6 +642,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
_("value for \"connection_check_type\" must be \"ping\" or \"connection\"\n"));
}
}
else if (strcmp(name, "primary_visibility_consensus") == 0)
options->primary_visibility_consensus = parse_bool(value, name, error_list);
/* witness settings */
else if (strcmp(name, "witness_sync_interval") == 0)
@@ -1227,8 +1230,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
{
strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN);
log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo);
}
PQfinish(conn);
}
@@ -1306,7 +1309,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
config_changed = true;
}
/* promote_command */
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
{

View File

@@ -144,6 +144,7 @@ typedef struct
bool standby_disconnect_on_failover;
int sibling_nodes_disconnect_timeout;
ConnectionCheckType connection_check_type;
bool primary_visibility_consensus;
/* BDR settings */
bool bdr_local_monitoring_only;
@@ -215,7 +216,7 @@ typedef struct
false, -1, \
DEFAULT_ASYNC_QUERY_TIMEOUT, \
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, \
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, true, \
/* BDR settings */ \
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
/* service settings */ \

View File

@@ -327,6 +327,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds)
# to wait for other standbys to confirm they have disconnected their
# WAL receivers
#primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen
# the primary node recently
#------------------------------------------------------------------------------
# service control commands

View File

@@ -3526,20 +3526,24 @@ do_election(void)
if (nodes_with_primary_still_visible > 0)
{
log_notice(_("%i nodes can see the primary"),
log_info(_("%i nodes can see the primary"),
nodes_with_primary_still_visible);
log_detail(_("following nodes can see the primary:\n%s"),
nodes_with_primary_visible.data);
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
if (config_file_options.primary_visibility_consensus == true)
{
log_notice(_("cancelling failover as some nodes can still see the primary"));
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
reset_node_voting_status();
reset_node_voting_status();
termPQExpBuffer(&nodes_with_primary_visible);
termPQExpBuffer(&nodes_with_primary_visible);
return ELECTION_CANCELLED;
return ELECTION_CANCELLED;
}
}
termPQExpBuffer(&nodes_with_primary_visible);