mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 00:46:29 +00:00
Add tunables for connection retries to master and interval between
connection retries, these parameters along with master_response_timeout determines the amount of time since failure to failover
This commit is contained in:
34
config.c
34
config.c
@@ -45,6 +45,10 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
/* if nothing has been provided defaults to 60 */
|
/* if nothing has been provided defaults to 60 */
|
||||||
options->master_response_timeout = 60;
|
options->master_response_timeout = 60;
|
||||||
|
|
||||||
|
/* it defaults to 6 retries with a time between retries of 10s */
|
||||||
|
options->reconnect_attempts = 6;
|
||||||
|
options->reconnect_intvl = 10;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since some commands don't require a config file at all, not
|
* Since some commands don't require a config file at all, not
|
||||||
* having one isn't necessarily a problem.
|
* having one isn't necessarily a problem.
|
||||||
@@ -103,6 +107,10 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
strncpy(options->follow_command, value, MAXLEN);
|
strncpy(options->follow_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "master_response_timeout") == 0)
|
else if (strcmp(name, "master_response_timeout") == 0)
|
||||||
options->master_response_timeout = atoi(value);
|
options->master_response_timeout = atoi(value);
|
||||||
|
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||||
|
options->reconnect_attempts = atoi(value);
|
||||||
|
else if (strcmp(name, "reconnect_interval") == 0)
|
||||||
|
options->reconnect_intvl = atoi(value);
|
||||||
else
|
else
|
||||||
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
|
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
|
||||||
}
|
}
|
||||||
@@ -128,6 +136,18 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
log_err(_("Master response timeout must be greater than zero. Check the configuration file.\n"));
|
log_err(_("Master response timeout must be greater than zero. Check the configuration file.\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (options->reconnect_attempts < 0)
|
||||||
|
{
|
||||||
|
log_err(_("Reconnect attempts must be zero or greater. Check the configuration file.\n"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options->reconnect_intvl <= 0)
|
||||||
|
{
|
||||||
|
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -232,6 +252,18 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_options.reconnect_attempts < 0)
|
||||||
|
{
|
||||||
|
log_warning(_("\nNew value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_options.reconnect_intvl < 0)
|
||||||
|
{
|
||||||
|
log_warning(_("\nNew value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Test conninfo string */
|
/* Test conninfo string */
|
||||||
conn = establishDBConnection(new_options.conninfo, false);
|
conn = establishDBConnection(new_options.conninfo, false);
|
||||||
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
||||||
@@ -252,6 +284,8 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
|
|||||||
strcpy(orig_options->follow_command, new_options.follow_command);
|
strcpy(orig_options->follow_command, new_options.follow_command);
|
||||||
strcpy(orig_options->rsync_options, new_options.rsync_options);
|
strcpy(orig_options->rsync_options, new_options.rsync_options);
|
||||||
orig_options->master_response_timeout = new_options.master_response_timeout;
|
orig_options->master_response_timeout = new_options.master_response_timeout;
|
||||||
|
orig_options->reconnect_attempts = new_options.reconnect_attempts;
|
||||||
|
orig_options->reconnect_intvl = new_options.reconnect_intvl;
|
||||||
/*
|
/*
|
||||||
* XXX These ones can change with a simple SIGHUP?
|
* XXX These ones can change with a simple SIGHUP?
|
||||||
|
|
||||||
|
|||||||
2
config.h
2
config.h
@@ -37,6 +37,8 @@ typedef struct
|
|||||||
char logfacility[MAXLEN];
|
char logfacility[MAXLEN];
|
||||||
char rsync_options[QUERY_STR_LEN];
|
char rsync_options[QUERY_STR_LEN];
|
||||||
int master_response_timeout;
|
int master_response_timeout;
|
||||||
|
int reconnect_attempts;
|
||||||
|
int reconnect_intvl;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
void parse_config(const char *config_file, t_configuration_options *options);
|
void parse_config(const char *config_file, t_configuration_options *options);
|
||||||
|
|||||||
@@ -16,6 +16,10 @@ rsync_options=--archive --checksum --compress --progress --rsh=ssh
|
|||||||
# How many seconds we wait for master response before declaring master failure
|
# How many seconds we wait for master response before declaring master failure
|
||||||
master_response_timeout=60
|
master_response_timeout=60
|
||||||
|
|
||||||
|
# How many time we try to reconnect to master before starting failover procedure
|
||||||
|
reconnect_attempts=6
|
||||||
|
reconnect_interval=10
|
||||||
|
|
||||||
# Autofailover options
|
# Autofailover options
|
||||||
failover=automatic
|
failover=automatic
|
||||||
priority=-1
|
priority=-1
|
||||||
|
|||||||
4
repmgr.h
4
repmgr.h
@@ -69,9 +69,5 @@ typedef struct
|
|||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define SLEEP_MONITOR 2
|
#define SLEEP_MONITOR 2
|
||||||
#define SLEEP_RETRY 3
|
|
||||||
#define NUM_RETRY 40
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
16
repmgrd.c
16
repmgrd.c
@@ -345,7 +345,7 @@ WitnessMonitor(void)
|
|||||||
* Check if the master is still available, if after 5 minutes of retries
|
* Check if the master is still available, if after 5 minutes of retries
|
||||||
* we cannot reconnect, return false.
|
* we cannot reconnect, return false.
|
||||||
*/
|
*/
|
||||||
CheckPrimaryConnection(); // this take up to NUM_RETRY * SLEEP_RETRY seconds
|
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds
|
||||||
|
|
||||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -429,7 +429,7 @@ StandbyMonitor(void)
|
|||||||
* Check if the master is still available, if after 5 minutes of retries
|
* Check if the master is still available, if after 5 minutes of retries
|
||||||
* we cannot reconnect, try to get a new master.
|
* we cannot reconnect, try to get a new master.
|
||||||
*/
|
*/
|
||||||
CheckPrimaryConnection(); // this take up to NUM_RETRY * SLEEP_RETRY seconds
|
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds
|
||||||
|
|
||||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -762,17 +762,19 @@ CheckPrimaryConnection(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the master is still available
|
* Check if the master is still available
|
||||||
* if after NUM_RETRY * SLEEP_RETRY seconds of retries
|
* if after local_options.reconnect_attempts * local_options.reconnect_intvl seconds of retries
|
||||||
* we cannot reconnect
|
* we cannot reconnect
|
||||||
* return false
|
* return false
|
||||||
*/
|
*/
|
||||||
for (connection_retries = 0; connection_retries < NUM_RETRY; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
{
|
{
|
||||||
if (!is_pgup(primaryConn, local_options.master_response_timeout))
|
if (!is_pgup(primaryConn, local_options.master_response_timeout))
|
||||||
{
|
{
|
||||||
log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"), progname, (SLEEP_RETRY*(NUM_RETRY-connection_retries)));
|
log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"),
|
||||||
/* wait SLEEP_RETRY seconds between retries */
|
progname,
|
||||||
sleep(SLEEP_RETRY);
|
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
|
||||||
|
/* wait local_options.reconnect_intvl seconds between retries */
|
||||||
|
sleep(local_options.reconnect_intvl);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user