Make repmgrd failover settings configurable

This commit is contained in:
Ian Barwick
2017-07-07 21:11:22 +09:00
parent b08511ec79
commit 2787994a6e
7 changed files with 34 additions and 17 deletions

View File

@@ -239,11 +239,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
strncpy(options->location, DEFAULT_LOCATION, MAXLEN); strncpy(options->location, DEFAULT_LOCATION, MAXLEN);
memset(options->promote_command, 0, sizeof(options->promote_command)); memset(options->promote_command, 0, sizeof(options->promote_command));
memset(options->follow_command, 0, sizeof(options->follow_command)); memset(options->follow_command, 0, sizeof(options->follow_command));
options->monitor_interval_secs = 2; options->monitor_interval_secs = DEFAULT_STATS_REPORTING_INTERVAL;
options->primary_response_timeout = 60; options->primary_response_timeout = 60;
/* default to 6 reconnection attempts at intervals of 10 seconds */ /* default to 6 reconnection attempts at intervals of 10 seconds */
options->reconnect_attempts = 6; options->reconnect_attempts = DEFAULT_RECONNECTION_ATTEMPTS;
options->reconnect_interval = 10; options->reconnect_interval = DEFAULT_RECONNECTION_INTERVAL;
options->retry_promote_interval_secs = 300; options->retry_promote_interval_secs = 300;
options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */ options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */
options->degraded_monitoring_timeout = -1; options->degraded_monitoring_timeout = -1;

View File

@@ -125,7 +125,12 @@ typedef struct
/* standby clone settings */ \ /* standby clone settings */ \
false, "", "", "", "", { NULL, NULL }, \ false, "", "", "", "", { NULL, NULL }, \
/* repmgrd settings */ \ /* repmgrd settings */ \
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", 2, 60, 6, 10, 300, false, -1, \ FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
DEFAULT_STATS_REPORTING_INTERVAL, \
60, \
DEFAULT_RECONNECTION_ATTEMPTS, \
DEFAULT_RECONNECTION_INTERVAL, \
300, false, -1, \
/* witness settings */ \ /* witness settings */ \
30, \ 30, \
/* service settings */ \ /* service settings */ \

View File

@@ -1202,7 +1202,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN); strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN); strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN);
strncpy(node_info->repluser, PQgetvalue(res, row, 5), MAXLEN); strncpy(node_info->repluser, PQgetvalue(res, row, 5), NAMEDATALEN);
strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN); strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN);
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN); strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
node_info->priority = atoi(PQgetvalue(res, row, 8)); node_info->priority = atoi(PQgetvalue(res, row, 8));

View File

@@ -48,7 +48,7 @@ static char upstream_data_directory[MAXPGPATH];
static t_conninfo_param_list recovery_conninfo; static t_conninfo_param_list recovery_conninfo;
static char recovery_conninfo_str[MAXLEN]; static char recovery_conninfo_str[MAXLEN];
static char upstream_repluser[MAXLEN]; static char upstream_repluser[NAMEDATALEN];
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER; static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
@@ -1755,12 +1755,11 @@ check_source_server()
upstream_node_id = runtime_options.upstream_node_id; upstream_node_id = runtime_options.upstream_node_id;
record_status = get_node_record(source_conn, upstream_node_id, &node_record); record_status = get_node_record(source_conn, upstream_node_id, &node_record);
if (record_status == RECORD_FOUND) if (record_status == RECORD_FOUND)
{ {
upstream_record_found = true; upstream_record_found = true;
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN); strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
strncpy(upstream_repluser, node_record.repluser, MAXLEN); strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
} }
/* /*

View File

@@ -33,8 +33,12 @@
#define BDR_MONITORING_LOCAL 1 #define BDR_MONITORING_LOCAL 1
#define BDR_MONITORING_PRIORITY 2 #define BDR_MONITORING_PRIORITY 2
#define DEFAULT_LOCATION "default" #define DEFAULT_LOCATION "default"
#define DEFAULT_PRIORITY 100 #define DEFAULT_PRIORITY 100
#define DEFAULT_RECONNECTION_ATTEMPTS 6
#define DEFAULT_RECONNECTION_INTERVAL 10
#define DEFAULT_STATS_REPORTING_INTERVAL 2
#define FAILOVER_NODES_MAX_CHECK 50 #define FAILOVER_NODES_MAX_CHECK 50

View File

@@ -583,9 +583,9 @@ monitor_streaming_primary(void)
goto loop; goto loop;
} }
monitoring_state = MS_DEGRADED; monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start); INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
} }
} }
@@ -894,6 +894,10 @@ monitor_streaming_standby(void)
if (PQstatus(upstream_conn) == CONNECTION_OK) if (PQstatus(upstream_conn) == CONNECTION_OK)
{ {
// XXX check here if upstream is still primary // XXX check here if upstream is still primary
// -> will be a problem if another node was promoted in the meantime
// and upstream is now former primary
// XXX scan other nodes to see if any has become primary
upstream_node_status = NODE_STATUS_UP; upstream_node_status = NODE_STATUS_UP;
monitoring_state = MS_NORMAL; monitoring_state = MS_NORMAL;
@@ -930,8 +934,9 @@ monitor_streaming_standby(void)
goto loop; goto loop;
} }
} }
// unable to connect to former primary - check if another node has
// been promoted
// XXX scan other nodes to see if any has become primary
} }
loop: loop:
@@ -1218,9 +1223,13 @@ do_upstream_standby_failover(void)
PQfinish(upstream_conn); PQfinish(upstream_conn);
upstream_conn = NULL; upstream_conn = NULL;
// check status
record_status = get_primary_node_record(local_conn, &primary_node_info); record_status = get_primary_node_record(local_conn, &primary_node_info);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve primary node record"));
return false;
}
/* /*
* Verify that we can still talk to the cluster primary, even though * Verify that we can still talk to the cluster primary, even though
* the node's upstream is not available * the node's upstream is not available
@@ -2185,8 +2194,7 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
int i; int i;
// XXX make this all configurable int max_attempts = config_file_options.reconnect_attempts;
int max_attempts = 5;
for (i = 0; i < max_attempts; i++) for (i = 0; i < max_attempts; i++)
{ {
@@ -2207,7 +2215,9 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
PQfinish(conn); PQfinish(conn);
log_notice(_("unable to reconnect to node")); log_notice(_("unable to reconnect to node"));
} }
sleep(1); log_info(_("sleeping %i seconds until next reconnection_attempt"),
config_file_options.reconnect_interval);
sleep(config_file_options.reconnect_interval);
} }

View File

@@ -15,7 +15,6 @@
/* same as defined in src/include/replication/walreceiver.h */ /* same as defined in src/include/replication/walreceiver.h */
#define MAXCONNINFO 1024 #define MAXCONNINFO 1024
/* Why? http://stackoverflow.com/a/5459929/398670 */
#define STR(x) CppAsString(x) #define STR(x) CppAsString(x)
#define MAXLEN_STR STR(MAXLEN) #define MAXLEN_STR STR(MAXLEN)