mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Add configuration option "sibling_nodes_disconnect_timeout"
This controls the maximum length of time in seconds that repmgrd will wait for other standbys to disconnect their WAL receivers in a failover situation. This setting is only used when "standby_disconnect_on_failover" is set to "true".
This commit is contained in:
@@ -359,6 +359,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
|
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
|
||||||
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
|
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
|
||||||
options->standby_disconnect_on_failover = false;
|
options->standby_disconnect_on_failover = false;
|
||||||
|
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
|
||||||
options->connection_check_type = CHECK_PING;
|
options->connection_check_type = CHECK_PING;
|
||||||
|
|
||||||
/*-------------
|
/*-------------
|
||||||
@@ -622,6 +623,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
|
strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
|
||||||
else if (strcmp(name, "standby_disconnect_on_failover") == 0)
|
else if (strcmp(name, "standby_disconnect_on_failover") == 0)
|
||||||
options->standby_disconnect_on_failover = parse_bool(value, name, error_list);
|
options->standby_disconnect_on_failover = parse_bool(value, name, error_list);
|
||||||
|
else if (strcmp(name, "sibling_nodes_disconnect_timeout") == 0)
|
||||||
|
options->sibling_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "connection_check_type") == 0)
|
else if (strcmp(name, "connection_check_type") == 0)
|
||||||
{
|
{
|
||||||
if (strcasecmp(value, "ping") == 0)
|
if (strcasecmp(value, "ping") == 0)
|
||||||
|
|||||||
@@ -142,6 +142,7 @@ typedef struct
|
|||||||
int repmgrd_standby_startup_timeout;
|
int repmgrd_standby_startup_timeout;
|
||||||
char repmgrd_pid_file[MAXPGPATH];
|
char repmgrd_pid_file[MAXPGPATH];
|
||||||
bool standby_disconnect_on_failover;
|
bool standby_disconnect_on_failover;
|
||||||
|
int sibling_nodes_disconnect_timeout;
|
||||||
ConnectionCheckType connection_check_type;
|
ConnectionCheckType connection_check_type;
|
||||||
|
|
||||||
/* BDR settings */
|
/* BDR settings */
|
||||||
@@ -214,7 +215,7 @@ typedef struct
|
|||||||
false, -1, \
|
false, -1, \
|
||||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||||
-1, "", false, CHECK_PING, \
|
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, \
|
||||||
/* BDR settings */ \
|
/* BDR settings */ \
|
||||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||||
/* service settings */ \
|
/* service settings */ \
|
||||||
|
|||||||
@@ -322,6 +322,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# "--no-pid-file" will force PID file creation to be skipped.
|
# "--no-pid-file" will force PID file creation to be skipped.
|
||||||
# Note: there is normally no need to set this, particularly if
|
# Note: there is normally no need to set this, particularly if
|
||||||
# repmgr was installed from packages.
|
# repmgr was installed from packages.
|
||||||
|
#standby_disconnect_on_failover=false # If "true", in a failover situation wait for all standbys to
|
||||||
|
# disconnect their WAL receivers before electing a new primary
|
||||||
|
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds)
|
||||||
|
# to wait for other standbys to confirm they have disconnected their
|
||||||
|
# WAL receivers
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# service control commands
|
# service control commands
|
||||||
|
|||||||
1
repmgr.h
1
repmgr.h
@@ -91,6 +91,7 @@
|
|||||||
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
|
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
|
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
|
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
|
||||||
|
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
|
||||||
|
|
||||||
#define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */
|
#define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */
|
||||||
|
|
||||||
|
|||||||
@@ -1989,8 +1989,6 @@ do_primary_failover(void)
|
|||||||
static NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
static NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// XXX make configurable
|
|
||||||
int sibling_nodes_disconnect_timeout = 30;
|
|
||||||
bool sibling_node_wal_receiver_connected = false;
|
bool sibling_node_wal_receiver_connected = false;
|
||||||
|
|
||||||
if (PQserverVersion(local_conn) < 90500)
|
if (PQserverVersion(local_conn) < 90500)
|
||||||
@@ -2016,7 +2014,7 @@ do_primary_failover(void)
|
|||||||
local_node_info.upstream_node_id,
|
local_node_info.upstream_node_id,
|
||||||
&check_sibling_nodes);
|
&check_sibling_nodes);
|
||||||
|
|
||||||
for (i = 0; i < sibling_nodes_disconnect_timeout; i++)
|
for (i = 0; i < config_file_options.sibling_nodes_disconnect_timeout; i++)
|
||||||
{
|
{
|
||||||
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
@@ -2048,13 +2046,13 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
log_debug("sleeping %i of max %i seconds (\"sibling_nodes_disconnect_timeout\")",
|
log_debug("sleeping %i of max %i seconds (\"sibling_nodes_disconnect_timeout\")",
|
||||||
i + 1, sibling_nodes_disconnect_timeout)
|
i + 1, config_file_options.sibling_nodes_disconnect_timeout);
|
||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sibling_node_wal_receiver_connected == true)
|
if (sibling_node_wal_receiver_connected == true)
|
||||||
{
|
{
|
||||||
// XXX what do we do here? abort or continue? make configurable?
|
/* TODO: prevent any such nodes becoming promotion candidates */
|
||||||
log_warning(_("WAL receiver still connected on at least one sibling node"));
|
log_warning(_("WAL receiver still connected on at least one sibling node"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
Reference in New Issue
Block a user