repmgrd: enable election rerun

If "failover_validation_command" is set, and the command returns an error,
rerun the election.

There is a pause between reruns to avoid "churn"; the length of this pause
is controlled by the configuration parameter "election_rerun_interval".
This commit is contained in:
Ian Barwick
2019-03-12 14:03:59 +09:00
committed by Ian Barwick
parent 99923f5ffc
commit fc397f25f6
6 changed files with 68 additions and 6 deletions

View File

@@ -363,6 +363,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->connection_check_type = CHECK_PING; options->connection_check_type = CHECK_PING;
options->primary_visibility_consensus = false; options->primary_visibility_consensus = false;
memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command)); memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command));
options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL;
/*------------- /*-------------
* witness settings * witness settings
@@ -647,6 +648,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->primary_visibility_consensus = parse_bool(value, name, error_list); options->primary_visibility_consensus = parse_bool(value, name, error_list);
else if (strcmp(name, "failover_validation_command") == 0) else if (strcmp(name, "failover_validation_command") == 0)
strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command)); strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command));
else if (strcmp(name, "election_rerun_interval") == 0)
options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0);
/* witness settings */ /* witness settings */
else if (strcmp(name, "witness_sync_interval") == 0) else if (strcmp(name, "witness_sync_interval") == 0)

View File

@@ -146,6 +146,7 @@ typedef struct
ConnectionCheckType connection_check_type; ConnectionCheckType connection_check_type;
bool primary_visibility_consensus; bool primary_visibility_consensus;
char failover_validation_command[MAXPGPATH]; char failover_validation_command[MAXPGPATH];
int election_rerun_interval;
/* BDR settings */ /* BDR settings */
bool bdr_local_monitoring_only; bool bdr_local_monitoring_only;
@@ -217,7 +218,8 @@ typedef struct
false, -1, \ false, -1, \
DEFAULT_ASYNC_QUERY_TIMEOUT, \ DEFAULT_ASYNC_QUERY_TIMEOUT, \
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, true, "", \ -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \
CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \
/* BDR settings */ \ /* BDR settings */ \
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
/* service settings */ \ /* service settings */ \

View File

@@ -275,6 +275,19 @@
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<indexterm>
<primary>election_rerun_interval</primary>
</indexterm>
<term><option>election_rerun_interval</option></term>
<listitem>
<para>
If <option>failover_validation_command</option> is set, and the command returns
an error, pause the specified amount of seconds (default: 15) before rerunning the election.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
<para> <para>

View File

@@ -332,7 +332,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#failover_validation_command= # Script to execute for an external mechanism to validate the failover #failover_validation_command= # Script to execute for an external mechanism to validate the failover
# decision made by repmgrd. One or both of the following parameter placeholders # decision made by repmgrd. One or both of the following parameter placeholders
# should be provided, which will be replaced by repmgrd with the appropriate # should be provided, which will be replaced by repmgrd with the appropriate
# value: %n (node_id), %a (node_name) # value: %n (node_id), %a (node_name). *Must* be the same on all nodes.
#election_rerun_interval=15 # if "failover_validation_command" is set, and the command returns
# an error, pause the specified amount of seconds before rerunning the election.
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# service control commands # service control commands

View File

@@ -60,6 +60,7 @@
#define NO_UPSTREAM_NODE -1 #define NO_UPSTREAM_NODE -1
#define UNKNOWN_NODE_ID -1 #define UNKNOWN_NODE_ID -1
#define MIN_NODE_ID 1 #define MIN_NODE_ID 1
#define ELECTION_RERUN_NOTIFICATION -2
#define VOTING_TERM_NOT_SET -1 #define VOTING_TERM_NOT_SET -1
#define ARCHIVE_STATUS_DIR_ERROR -1 #define ARCHIVE_STATUS_DIR_ERROR -1
@@ -92,6 +93,7 @@
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */ #define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ #define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
#define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */
#define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */ #define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */

View File

@@ -37,7 +37,8 @@ typedef enum
FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY, FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY,
FAILOVER_STATE_NO_NEW_PRIMARY, FAILOVER_STATE_NO_NEW_PRIMARY,
FAILOVER_STATE_FOLLOW_FAIL, FAILOVER_STATE_FOLLOW_FAIL,
FAILOVER_STATE_NODE_NOTIFICATION_ERROR FAILOVER_STATE_NODE_NOTIFICATION_ERROR,
FAILOVER_STATE_ELECTION_RERUN
} FailoverState; } FailoverState;
@@ -46,7 +47,8 @@ typedef enum
ELECTION_NOT_CANDIDATE = -1, ELECTION_NOT_CANDIDATE = -1,
ELECTION_WON, ELECTION_WON,
ELECTION_LOST, ELECTION_LOST,
ELECTION_CANCELLED ELECTION_CANCELLED,
ELECTION_RERUN
} ElectionResult; } ElectionResult;
@@ -2086,6 +2088,14 @@ do_primary_failover(void)
log_notice(_("election cancelled")); log_notice(_("election cancelled"));
return false; return false;
} }
else if (election_result == ELECTION_RERUN)
{
log_notice(_("election rerun"));
/* notify siblings that they should rerun the election too */
notify_followers(&sibling_nodes, ELECTION_RERUN_NOTIFICATION);
failover_state = FAILOVER_STATE_ELECTION_RERUN;
}
else if (election_result == ELECTION_WON) else if (election_result == ELECTION_WON)
{ {
if (sibling_nodes.node_count > 0) if (sibling_nodes.node_count > 0)
@@ -2148,6 +2158,12 @@ do_primary_failover(void)
&sibling_nodes); &sibling_nodes);
} }
/* election rerun */
else if (new_primary_id == ELECTION_RERUN_NOTIFICATION)
{
log_notice(_("election rerun"));
failover_state = FAILOVER_STATE_ELECTION_RERUN;
}
else if (config_file_options.failover == FAILOVER_MANUAL) else if (config_file_options.failover == FAILOVER_MANUAL)
{ {
/* automatic failover disabled */ /* automatic failover disabled */
@@ -2218,6 +2234,24 @@ do_primary_failover(void)
failover_state = FAILOVER_STATE_NONE; failover_state = FAILOVER_STATE_NONE;
return true; return true;
case FAILOVER_STATE_ELECTION_RERUN:
/* we no longer care about our former siblings */
clear_node_info_list(&sibling_nodes);
log_notice(_("rerunning election after %i seconds (\"election_rerun_interval\")"),
config_file_options.election_rerun_interval);
sleep(config_file_options.election_rerun_interval);
/*
* mark the upstream node as "up" so another election is triggered
* after we fall back to monitoring
*/
upstream_node_info.node_status = NODE_STATUS_UP;
failover_state = FAILOVER_STATE_NONE;
return false;
case FAILOVER_STATE_PRIMARY_REAPPEARED: case FAILOVER_STATE_PRIMARY_REAPPEARED:
/* /*
@@ -2288,6 +2322,7 @@ do_primary_failover(void)
case FAILOVER_STATE_UNKNOWN: case FAILOVER_STATE_UNKNOWN:
case FAILOVER_STATE_NONE: case FAILOVER_STATE_NONE:
return false; return false;
} }
/* should never reach here */ /* should never reach here */
@@ -3160,6 +3195,9 @@ _print_election_result(ElectionResult result)
case ELECTION_CANCELLED: case ELECTION_CANCELLED:
return "CANCELLED"; return "CANCELLED";
case ELECTION_RERUN:
return "RERUN";
} }
/* should never reach here */ /* should never reach here */
@@ -3767,6 +3805,8 @@ format_failover_state(FailoverState failover_state)
return "FOLLOW_FAIL"; return "FOLLOW_FAIL";
case FAILOVER_STATE_NODE_NOTIFICATION_ERROR: case FAILOVER_STATE_NODE_NOTIFICATION_ERROR:
return "NODE_NOTIFICATION_ERROR"; return "NODE_NOTIFICATION_ERROR";
case FAILOVER_STATE_ELECTION_RERUN:
return "ELECTION_RERUN";
} }
/* should never reach here */ /* should never reach here */
@@ -3844,7 +3884,7 @@ execute_failover_validation_command(t_node_info *node_info)
{ {
/* create event here? */ /* create event here? */
log_notice(_("failover validation command returned a non-zero value (%i)"), return_value); log_notice(_("failover validation command returned a non-zero value (%i)"), return_value);
return ELECTION_LOST; return ELECTION_RERUN;
} }
log_notice(_("failover validation command returned zero")); log_notice(_("failover validation command returned zero"));