diff --git a/configfile.c b/configfile.c index c07c853e..22545620 100644 --- a/configfile.c +++ b/configfile.c @@ -255,6 +255,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * * ------------ */ options->bdr_local_monitoring_only = false; options->bdr_active_node_recovery = false; + options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT; /* service settings * ---------------- */ @@ -432,6 +433,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->bdr_local_monitoring_only = parse_bool(value, name, error_list); else if (strcmp(name, "bdr_active_node_recovery") == 0) options->bdr_active_node_recovery = parse_bool(value, name, error_list); + else if (strcmp(name, "bdr_recovery_timeout") == 0) + options->bdr_active_node_recovery = repmgr_atoi(value, name, error_list, 0); /* service settings */ else if (strcmp(name, "pg_ctl_options") == 0) diff --git a/configfile.h b/configfile.h index 142c77ef..3399e1ff 100644 --- a/configfile.h +++ b/configfile.h @@ -89,6 +89,7 @@ typedef struct /* BDR settings */ bool bdr_local_monitoring_only; bool bdr_active_node_recovery; + bool bdr_recovery_timeout; /* service settings */ char pg_ctl_options[MAXLEN]; @@ -133,7 +134,7 @@ typedef struct DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \ /* BDR settings */ \ - false, false, \ + false, false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ "", "", "", "", "", "", \ /* event notification settings */ \ diff --git a/repmgr.conf.sample b/repmgr.conf.sample index cdc4da99..dd32e068 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -236,4 +236,12 @@ ssh_options='' # Options to append to "ssh" # BDR monitoring options #------------------------------------------------------------------------------ -#bdr_active_node_recovery=false # \ No newline at end of file +#bdr_local_monitoring_only=false # Only monitor the local node; no checks will be + # performed on the other node +#bdr_recovery_timeout # If a BDR node was offline and has become available + # maximum length of time in seconds to wait for the + # node to reconnect to the cluster +#bdr_active_node_recovery=false # If a BDR node was offline and has recovered, + # provide connection details with the "bdr_recovery" + # event to enable automatic reconfiguration of the node + # to accept connections diff --git a/repmgr.h b/repmgr.h index a99671d0..f84c5fa3 100644 --- a/repmgr.h +++ b/repmgr.h @@ -45,8 +45,8 @@ #define DEFAULT_STATS_REPORTING_INTERVAL 2 #define DEFAULT_ASYNC_QUERY_TIMEOUT 60 #define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 -#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 - +#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 +#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 #define FAILOVER_NODES_MAX_CHECK 50 diff --git a/repmgrd-bdr.c b/repmgrd-bdr.c index 22cc5233..930cbeed 100644 --- a/repmgrd-bdr.c +++ b/repmgrd-bdr.c @@ -95,7 +95,7 @@ monitor_bdr(void) { log_error(_("unable to retrieve record for local node (ID: %i), terminating"), local_node_info.node_id); - log_hint(_("check that 'repmgr bdr register' was executed for this node")); + log_hint(_("check that \"repmgr bdr register\" was executed for this node")); PQfinish(local_conn); exit(ERR_BAD_CONFIG); } @@ -191,7 +191,7 @@ monitor_bdr(void) cell->node_info->conn = try_reconnect(cell->node_info); - /* Node has recovered - log and continue */ + /* node has recovered - log and continue */ if (cell->node_info->node_status == NODE_STATUS_UP) { int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start); @@ -267,7 +267,7 @@ monitor_bdr(void) /* * do_bdr_failover() - *0 + * * Here we attempt to perform a BDR "failover". * * As there's no equivalent of a physical replication failover, @@ -292,6 +292,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) t_node_info failed_node = T_NODE_INFO_INITIALIZER; RecordStatus record_status; + /* if one of the two nodes is down, cluster will be in a degraded state */ monitored_node->monitoring_state = MS_DEGRADED; INSTR_TIME_SET_CURRENT(degraded_monitoring_start); @@ -305,8 +306,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) if (cell->node_info->node_id == monitored_node->node_id) continue; - /* XXX skip inactive node? */ - // reuse local conn if local node is up + /* TODO: reuse local conn if local node is up */ next_node_conn = establish_db_connection(cell->node_info->conninfo, false); if (PQstatus(next_node_conn) == CONNECTION_OK) @@ -353,22 +353,28 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) if (am_bdr_failover_handler(next_node_conn, local_node_info.node_id) == false) { PQfinish(next_node_conn); - log_debug("other node's repmgrd is handling failover"); + log_notice(_("other node's repmgrd is handling failover")); return; } + + /* check here that the node hasn't come back up */ + if (is_server_available(monitored_node->conninfo) == true) + { + log_notice(_("node %i has reappeared, aborting failover"), + monitored_node->node_id); + monitored_node->monitoring_state = MS_NORMAL; + PQfinish(next_node_conn); + } + log_debug("this node is the failover handler"); - // check here that the node hasn't come back up... - - log_info(_("connecting to target node %s"), target_node.node_name); - initPQExpBuffer(&event_details); event_info.conninfo_str = target_node.conninfo; event_info.node_name = target_node.node_name; - /* update our own record on the other node */ + /* update node record on the active node */ update_node_record_set_active(next_node_conn, monitored_node->node_id, false); appendPQExpBuffer(&event_details, @@ -401,6 +407,8 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) unset_bdr_failover_handler(next_node_conn); + PQfinish(next_node_conn); + return; } @@ -413,6 +421,7 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) t_event_info event_info = T_EVENT_INFO_INITIALIZER; int i; bool node_recovered = false; + int node_recovery_elapsed; recovered_node_conn = establish_db_connection(monitored_node->conninfo, false); @@ -429,8 +438,7 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) return; } - // bdr_recovery_timeout - for (i = 0; i < 30; i++) + for (i = 0; i < config_file_options.bdr_recovery_timeout; i++) { RecordStatus record_status = get_bdr_node_record_by_name( recovered_node_conn, @@ -439,6 +447,7 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) if (record_status == RECORD_FOUND && bdr_record.node_status == 'r') { + // check pg_stat_replication node_recovered = true; break; } @@ -459,13 +468,15 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) // don't end up monitoring a parted node; if not attached, // generate a failed bdr_recovery event - - // note elapsed initPQExpBuffer(&event_details); + + node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start); + appendPQExpBuffer(&event_details, - _("node '%s' (ID: %i) has recovered"), + _("node '%s' (ID: %i) has recovered after %i seconds"), monitored_node->node_name, - monitored_node->node_id); + monitored_node->node_id, + node_recovery_elapsed); monitored_node->monitoring_state = MS_NORMAL;