mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Make repmgrd failover settings configurable
This commit is contained in:
6
config.c
6
config.c
@@ -239,11 +239,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
strncpy(options->location, DEFAULT_LOCATION, MAXLEN);
|
||||
memset(options->promote_command, 0, sizeof(options->promote_command));
|
||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||
options->monitor_interval_secs = 2;
|
||||
options->monitor_interval_secs = DEFAULT_STATS_REPORTING_INTERVAL;
|
||||
options->primary_response_timeout = 60;
|
||||
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
||||
options->reconnect_attempts = 6;
|
||||
options->reconnect_interval = 10;
|
||||
options->reconnect_attempts = DEFAULT_RECONNECTION_ATTEMPTS;
|
||||
options->reconnect_interval = DEFAULT_RECONNECTION_INTERVAL;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */
|
||||
options->degraded_monitoring_timeout = -1;
|
||||
|
||||
7
config.h
7
config.h
@@ -125,7 +125,12 @@ typedef struct
|
||||
/* standby clone settings */ \
|
||||
false, "", "", "", "", { NULL, NULL }, \
|
||||
/* repmgrd settings */ \
|
||||
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", 2, 60, 6, 10, 300, false, -1, \
|
||||
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
|
||||
DEFAULT_STATS_REPORTING_INTERVAL, \
|
||||
60, \
|
||||
DEFAULT_RECONNECTION_ATTEMPTS, \
|
||||
DEFAULT_RECONNECTION_INTERVAL, \
|
||||
300, false, -1, \
|
||||
/* witness settings */ \
|
||||
30, \
|
||||
/* service settings */ \
|
||||
|
||||
@@ -1202,7 +1202,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
||||
|
||||
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
|
||||
strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN);
|
||||
strncpy(node_info->repluser, PQgetvalue(res, row, 5), MAXLEN);
|
||||
strncpy(node_info->repluser, PQgetvalue(res, row, 5), NAMEDATALEN);
|
||||
strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN);
|
||||
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
||||
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
||||
|
||||
@@ -48,7 +48,7 @@ static char upstream_data_directory[MAXPGPATH];
|
||||
|
||||
static t_conninfo_param_list recovery_conninfo;
|
||||
static char recovery_conninfo_str[MAXLEN];
|
||||
static char upstream_repluser[MAXLEN];
|
||||
static char upstream_repluser[NAMEDATALEN];
|
||||
|
||||
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
|
||||
|
||||
@@ -1755,12 +1755,11 @@ check_source_server()
|
||||
upstream_node_id = runtime_options.upstream_node_id;
|
||||
|
||||
record_status = get_node_record(source_conn, upstream_node_id, &node_record);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
upstream_record_found = true;
|
||||
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
|
||||
strncpy(upstream_repluser, node_record.repluser, MAXLEN);
|
||||
strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
8
repmgr.h
8
repmgr.h
@@ -33,8 +33,12 @@
|
||||
#define BDR_MONITORING_LOCAL 1
|
||||
#define BDR_MONITORING_PRIORITY 2
|
||||
|
||||
#define DEFAULT_LOCATION "default"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define DEFAULT_LOCATION "default"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define DEFAULT_RECONNECTION_ATTEMPTS 6
|
||||
#define DEFAULT_RECONNECTION_INTERVAL 10
|
||||
#define DEFAULT_STATS_REPORTING_INTERVAL 2
|
||||
|
||||
#define FAILOVER_NODES_MAX_CHECK 50
|
||||
|
||||
|
||||
|
||||
22
repmgrd.c
22
repmgrd.c
@@ -583,9 +583,9 @@ monitor_streaming_primary(void)
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
monitoring_state = MS_DEGRADED;
|
||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -894,6 +894,10 @@ monitor_streaming_standby(void)
|
||||
if (PQstatus(upstream_conn) == CONNECTION_OK)
|
||||
{
|
||||
// XXX check here if upstream is still primary
|
||||
// -> will be a problem if another node was promoted in the meantime
|
||||
// and upstream is now former primary
|
||||
// XXX scan other nodes to see if any has become primary
|
||||
|
||||
upstream_node_status = NODE_STATUS_UP;
|
||||
monitoring_state = MS_NORMAL;
|
||||
|
||||
@@ -930,8 +934,9 @@ monitor_streaming_standby(void)
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
// unable to connect to former primary - check if another node has
|
||||
// been promoted
|
||||
|
||||
// XXX scan other nodes to see if any has become primary
|
||||
}
|
||||
|
||||
loop:
|
||||
@@ -1218,9 +1223,13 @@ do_upstream_standby_failover(void)
|
||||
PQfinish(upstream_conn);
|
||||
upstream_conn = NULL;
|
||||
|
||||
// check status
|
||||
record_status = get_primary_node_record(local_conn, &primary_node_info);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("unable to retrieve primary node record"));
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Verify that we can still talk to the cluster primary, even though
|
||||
* the node's upstream is not available
|
||||
@@ -2185,8 +2194,7 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
|
||||
|
||||
int i;
|
||||
|
||||
// XXX make this all configurable
|
||||
int max_attempts = 5;
|
||||
int max_attempts = config_file_options.reconnect_attempts;
|
||||
|
||||
for (i = 0; i < max_attempts; i++)
|
||||
{
|
||||
@@ -2207,7 +2215,9 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
|
||||
PQfinish(conn);
|
||||
log_notice(_("unable to reconnect to node"));
|
||||
}
|
||||
sleep(1);
|
||||
log_info(_("sleeping %i seconds until next reconnection_attempt"),
|
||||
config_file_options.reconnect_interval);
|
||||
sleep(config_file_options.reconnect_interval);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user