mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
Make repmgrd failover settings configurable
This commit is contained in:
6
config.c
6
config.c
@@ -239,11 +239,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
strncpy(options->location, DEFAULT_LOCATION, MAXLEN);
|
strncpy(options->location, DEFAULT_LOCATION, MAXLEN);
|
||||||
memset(options->promote_command, 0, sizeof(options->promote_command));
|
memset(options->promote_command, 0, sizeof(options->promote_command));
|
||||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||||
options->monitor_interval_secs = 2;
|
options->monitor_interval_secs = DEFAULT_STATS_REPORTING_INTERVAL;
|
||||||
options->primary_response_timeout = 60;
|
options->primary_response_timeout = 60;
|
||||||
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
||||||
options->reconnect_attempts = 6;
|
options->reconnect_attempts = DEFAULT_RECONNECTION_ATTEMPTS;
|
||||||
options->reconnect_interval = 10;
|
options->reconnect_interval = DEFAULT_RECONNECTION_INTERVAL;
|
||||||
options->retry_promote_interval_secs = 300;
|
options->retry_promote_interval_secs = 300;
|
||||||
options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */
|
options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */
|
||||||
options->degraded_monitoring_timeout = -1;
|
options->degraded_monitoring_timeout = -1;
|
||||||
|
|||||||
7
config.h
7
config.h
@@ -125,7 +125,12 @@ typedef struct
|
|||||||
/* standby clone settings */ \
|
/* standby clone settings */ \
|
||||||
false, "", "", "", "", { NULL, NULL }, \
|
false, "", "", "", "", { NULL, NULL }, \
|
||||||
/* repmgrd settings */ \
|
/* repmgrd settings */ \
|
||||||
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", 2, 60, 6, 10, 300, false, -1, \
|
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
|
||||||
|
DEFAULT_STATS_REPORTING_INTERVAL, \
|
||||||
|
60, \
|
||||||
|
DEFAULT_RECONNECTION_ATTEMPTS, \
|
||||||
|
DEFAULT_RECONNECTION_INTERVAL, \
|
||||||
|
300, false, -1, \
|
||||||
/* witness settings */ \
|
/* witness settings */ \
|
||||||
30, \
|
30, \
|
||||||
/* service settings */ \
|
/* service settings */ \
|
||||||
|
|||||||
@@ -1202,7 +1202,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
|||||||
|
|
||||||
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
|
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
|
||||||
strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN);
|
strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN);
|
||||||
strncpy(node_info->repluser, PQgetvalue(res, row, 5), MAXLEN);
|
strncpy(node_info->repluser, PQgetvalue(res, row, 5), NAMEDATALEN);
|
||||||
strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN);
|
strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN);
|
||||||
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
||||||
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ static char upstream_data_directory[MAXPGPATH];
|
|||||||
|
|
||||||
static t_conninfo_param_list recovery_conninfo;
|
static t_conninfo_param_list recovery_conninfo;
|
||||||
static char recovery_conninfo_str[MAXLEN];
|
static char recovery_conninfo_str[MAXLEN];
|
||||||
static char upstream_repluser[MAXLEN];
|
static char upstream_repluser[NAMEDATALEN];
|
||||||
|
|
||||||
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
|
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
|
||||||
|
|
||||||
@@ -1755,12 +1755,11 @@ check_source_server()
|
|||||||
upstream_node_id = runtime_options.upstream_node_id;
|
upstream_node_id = runtime_options.upstream_node_id;
|
||||||
|
|
||||||
record_status = get_node_record(source_conn, upstream_node_id, &node_record);
|
record_status = get_node_record(source_conn, upstream_node_id, &node_record);
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
if (record_status == RECORD_FOUND)
|
||||||
{
|
{
|
||||||
upstream_record_found = true;
|
upstream_record_found = true;
|
||||||
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
|
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
|
||||||
strncpy(upstream_repluser, node_record.repluser, MAXLEN);
|
strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
8
repmgr.h
8
repmgr.h
@@ -33,8 +33,12 @@
|
|||||||
#define BDR_MONITORING_LOCAL 1
|
#define BDR_MONITORING_LOCAL 1
|
||||||
#define BDR_MONITORING_PRIORITY 2
|
#define BDR_MONITORING_PRIORITY 2
|
||||||
|
|
||||||
#define DEFAULT_LOCATION "default"
|
#define DEFAULT_LOCATION "default"
|
||||||
#define DEFAULT_PRIORITY 100
|
#define DEFAULT_PRIORITY 100
|
||||||
|
#define DEFAULT_RECONNECTION_ATTEMPTS 6
|
||||||
|
#define DEFAULT_RECONNECTION_INTERVAL 10
|
||||||
|
#define DEFAULT_STATS_REPORTING_INTERVAL 2
|
||||||
|
|
||||||
#define FAILOVER_NODES_MAX_CHECK 50
|
#define FAILOVER_NODES_MAX_CHECK 50
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
22
repmgrd.c
22
repmgrd.c
@@ -583,9 +583,9 @@ monitor_streaming_primary(void)
|
|||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
monitoring_state = MS_DEGRADED;
|
monitoring_state = MS_DEGRADED;
|
||||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -894,6 +894,10 @@ monitor_streaming_standby(void)
|
|||||||
if (PQstatus(upstream_conn) == CONNECTION_OK)
|
if (PQstatus(upstream_conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
// XXX check here if upstream is still primary
|
// XXX check here if upstream is still primary
|
||||||
|
// -> will be a problem if another node was promoted in the meantime
|
||||||
|
// and upstream is now former primary
|
||||||
|
// XXX scan other nodes to see if any has become primary
|
||||||
|
|
||||||
upstream_node_status = NODE_STATUS_UP;
|
upstream_node_status = NODE_STATUS_UP;
|
||||||
monitoring_state = MS_NORMAL;
|
monitoring_state = MS_NORMAL;
|
||||||
|
|
||||||
@@ -930,8 +934,9 @@ monitor_streaming_standby(void)
|
|||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// unable to connect to former primary - check if another node has
|
||||||
|
// been promoted
|
||||||
|
|
||||||
// XXX scan other nodes to see if any has become primary
|
|
||||||
}
|
}
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
@@ -1218,9 +1223,13 @@ do_upstream_standby_failover(void)
|
|||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
upstream_conn = NULL;
|
upstream_conn = NULL;
|
||||||
|
|
||||||
// check status
|
|
||||||
record_status = get_primary_node_record(local_conn, &primary_node_info);
|
record_status = get_primary_node_record(local_conn, &primary_node_info);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve primary node record"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Verify that we can still talk to the cluster primary, even though
|
* Verify that we can still talk to the cluster primary, even though
|
||||||
* the node's upstream is not available
|
* the node's upstream is not available
|
||||||
@@ -2185,8 +2194,7 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// XXX make this all configurable
|
int max_attempts = config_file_options.reconnect_attempts;
|
||||||
int max_attempts = 5;
|
|
||||||
|
|
||||||
for (i = 0; i < max_attempts; i++)
|
for (i = 0; i < max_attempts; i++)
|
||||||
{
|
{
|
||||||
@@ -2207,7 +2215,9 @@ try_reconnect(const char *conninfo, NodeStatus *node_status)
|
|||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_notice(_("unable to reconnect to node"));
|
log_notice(_("unable to reconnect to node"));
|
||||||
}
|
}
|
||||||
sleep(1);
|
log_info(_("sleeping %i seconds until next reconnection_attempt"),
|
||||||
|
config_file_options.reconnect_interval);
|
||||||
|
sleep(config_file_options.reconnect_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
/* same as defined in src/include/replication/walreceiver.h */
|
/* same as defined in src/include/replication/walreceiver.h */
|
||||||
#define MAXCONNINFO 1024
|
#define MAXCONNINFO 1024
|
||||||
|
|
||||||
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
|
||||||
#define STR(x) CppAsString(x)
|
#define STR(x) CppAsString(x)
|
||||||
|
|
||||||
#define MAXLEN_STR STR(MAXLEN)
|
#define MAXLEN_STR STR(MAXLEN)
|
||||||
|
|||||||
Reference in New Issue
Block a user