From fc397f25f652a02092d00d9dee2e659f547038b7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 12 Mar 2019 14:03:59 +0900 Subject: [PATCH] repmgrd: enable election rerun If "failover_validation_command" is set, and the command returns an error, rerun the election. There is a pause between reruns to avoid "churn"; the length of this pause is controlled by the configuration parameter "election_rerun_interval". --- configfile.c | 3 +++ configfile.h | 4 ++- doc/repmgrd-configuration.sgml | 15 ++++++++++- repmgr.conf.sample | 4 ++- repmgr.h | 2 ++ repmgrd-physical.c | 46 +++++++++++++++++++++++++++++++--- 6 files changed, 68 insertions(+), 6 deletions(-) diff --git a/configfile.c b/configfile.c index 3961cacc..f5200acf 100644 --- a/configfile.c +++ b/configfile.c @@ -363,6 +363,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->connection_check_type = CHECK_PING; options->primary_visibility_consensus = false; memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command)); + options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL; /*------------- * witness settings @@ -647,6 +648,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->primary_visibility_consensus = parse_bool(value, name, error_list); else if (strcmp(name, "failover_validation_command") == 0) strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command)); + else if (strcmp(name, "election_rerun_interval") == 0) + options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0); /* witness settings */ else if (strcmp(name, "witness_sync_interval") == 0) diff --git a/configfile.h b/configfile.h index 9bb9515f..1001f7d7 100644 --- a/configfile.h +++ b/configfile.h @@ -146,6 +146,7 @@ typedef struct ConnectionCheckType connection_check_type; bool primary_visibility_consensus; char failover_validation_command[MAXPGPATH]; + int election_rerun_interval; /* BDR settings */ bool bdr_local_monitoring_only; @@ -217,7 +218,8 @@ typedef struct false, -1, \ DEFAULT_ASYNC_QUERY_TIMEOUT, \ DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ - -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, true, "", \ + -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \ + CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \ /* BDR settings */ \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ diff --git a/doc/repmgrd-configuration.sgml b/doc/repmgrd-configuration.sgml index df869a48..7b434908 100644 --- a/doc/repmgrd-configuration.sgml +++ b/doc/repmgrd-configuration.sgml @@ -261,7 +261,7 @@ One or both of the following parameter placeholders - should be provided, which will be replaced by repmgrd with the appropriate + should be provided, which will be replaced by repmgrd with the appropriate value: @@ -275,6 +275,19 @@ + + + election_rerun_interval + + + + + If is set, and the command returns + an error, pause the specified amount of seconds (default: 15) before rerunning the election. + + + + diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 812c6804..fa5bbea0 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -332,7 +332,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" #failover_validation_command= # Script to execute for an external mechanism to validate the failover # decision made by repmgrd. One or both of the following parameter placeholders # should be provided, which will be replaced by repmgrd with the appropriate - # value: %n (node_id), %a (node_name) + # value: %n (node_id), %a (node_name). *Must* be the same on all nodes. +#election_rerun_interval=15 # if "failover_validation_command" is set, and the command returns + # an error, pause the specified amount of seconds before rerunning the election. #------------------------------------------------------------------------------ # service control commands diff --git a/repmgr.h b/repmgr.h index b9e37abf..50c0f47f 100644 --- a/repmgr.h +++ b/repmgr.h @@ -60,6 +60,7 @@ #define NO_UPSTREAM_NODE -1 #define UNKNOWN_NODE_ID -1 #define MIN_NODE_ID 1 +#define ELECTION_RERUN_NOTIFICATION -2 #define VOTING_TERM_NOT_SET -1 #define ARCHIVE_STATUS_DIR_ERROR -1 @@ -92,6 +93,7 @@ #define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */ #define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ +#define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */ #define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */ diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 854e9522..dacadbc0 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -37,7 +37,8 @@ typedef enum FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY, FAILOVER_STATE_NO_NEW_PRIMARY, FAILOVER_STATE_FOLLOW_FAIL, - FAILOVER_STATE_NODE_NOTIFICATION_ERROR + FAILOVER_STATE_NODE_NOTIFICATION_ERROR, + FAILOVER_STATE_ELECTION_RERUN } FailoverState; @@ -46,7 +47,8 @@ typedef enum ELECTION_NOT_CANDIDATE = -1, ELECTION_WON, ELECTION_LOST, - ELECTION_CANCELLED + ELECTION_CANCELLED, + ELECTION_RERUN } ElectionResult; @@ -2086,6 +2088,14 @@ do_primary_failover(void) log_notice(_("election cancelled")); return false; } + else if (election_result == ELECTION_RERUN) + { + log_notice(_("election rerun")); + /* notify siblings that they should rerun the election too */ + notify_followers(&sibling_nodes, ELECTION_RERUN_NOTIFICATION); + + failover_state = FAILOVER_STATE_ELECTION_RERUN; + } else if (election_result == ELECTION_WON) { if (sibling_nodes.node_count > 0) @@ -2148,6 +2158,12 @@ do_primary_failover(void) &sibling_nodes); } + /* election rerun */ + else if (new_primary_id == ELECTION_RERUN_NOTIFICATION) + { + log_notice(_("election rerun")); + failover_state = FAILOVER_STATE_ELECTION_RERUN; + } else if (config_file_options.failover == FAILOVER_MANUAL) { /* automatic failover disabled */ @@ -2218,6 +2234,24 @@ do_primary_failover(void) failover_state = FAILOVER_STATE_NONE; return true; + + case FAILOVER_STATE_ELECTION_RERUN: + + /* we no longer care about our former siblings */ + clear_node_info_list(&sibling_nodes); + + log_notice(_("rerunning election after %i seconds (\"election_rerun_interval\")"), + config_file_options.election_rerun_interval); + sleep(config_file_options.election_rerun_interval); + + /* + * mark the upstream node as "up" so another election is triggered + * after we fall back to monitoring + */ + upstream_node_info.node_status = NODE_STATUS_UP; + failover_state = FAILOVER_STATE_NONE; + return false; + case FAILOVER_STATE_PRIMARY_REAPPEARED: /* @@ -2288,6 +2322,7 @@ do_primary_failover(void) case FAILOVER_STATE_UNKNOWN: case FAILOVER_STATE_NONE: return false; + } /* should never reach here */ @@ -3160,6 +3195,9 @@ _print_election_result(ElectionResult result) case ELECTION_CANCELLED: return "CANCELLED"; + + case ELECTION_RERUN: + return "RERUN"; } /* should never reach here */ @@ -3767,6 +3805,8 @@ format_failover_state(FailoverState failover_state) return "FOLLOW_FAIL"; case FAILOVER_STATE_NODE_NOTIFICATION_ERROR: return "NODE_NOTIFICATION_ERROR"; + case FAILOVER_STATE_ELECTION_RERUN: + return "ELECTION_RERUN"; } /* should never reach here */ @@ -3844,7 +3884,7 @@ execute_failover_validation_command(t_node_info *node_info) { /* create event here? */ log_notice(_("failover validation command returned a non-zero value (%i)"), return_value); - return ELECTION_LOST; + return ELECTION_RERUN; } log_notice(_("failover validation command returned zero"));