Add configuration option "sibling_nodes_disconnect_timeout"

This controls the maximum length of time in seconds that repmgrd will
wait for other standbys to disconnect their WAL receivers in a failover
situation.

This setting is only used when "standby_disconnect_on_failover" is set to "true".
This commit is contained in:
Ian Barwick
2019-03-06 15:38:39 +09:00
parent 2ed044c358
commit a3f90d2bba
5 changed files with 15 additions and 7 deletions

View File

@@ -359,6 +359,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */ options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file)); memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
options->standby_disconnect_on_failover = false; options->standby_disconnect_on_failover = false;
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
options->connection_check_type = CHECK_PING; options->connection_check_type = CHECK_PING;
/*------------- /*-------------
@@ -622,6 +623,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
strncpy(options->repmgrd_pid_file, value, MAXPGPATH); strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
else if (strcmp(name, "standby_disconnect_on_failover") == 0) else if (strcmp(name, "standby_disconnect_on_failover") == 0)
options->standby_disconnect_on_failover = parse_bool(value, name, error_list); options->standby_disconnect_on_failover = parse_bool(value, name, error_list);
else if (strcmp(name, "sibling_nodes_disconnect_timeout") == 0)
options->sibling_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0);
else if (strcmp(name, "connection_check_type") == 0) else if (strcmp(name, "connection_check_type") == 0)
{ {
if (strcasecmp(value, "ping") == 0) if (strcasecmp(value, "ping") == 0)

View File

@@ -142,6 +142,7 @@ typedef struct
int repmgrd_standby_startup_timeout; int repmgrd_standby_startup_timeout;
char repmgrd_pid_file[MAXPGPATH]; char repmgrd_pid_file[MAXPGPATH];
bool standby_disconnect_on_failover; bool standby_disconnect_on_failover;
int sibling_nodes_disconnect_timeout;
ConnectionCheckType connection_check_type; ConnectionCheckType connection_check_type;
/* BDR settings */ /* BDR settings */
@@ -214,7 +215,7 @@ typedef struct
false, -1, \ false, -1, \
DEFAULT_ASYNC_QUERY_TIMEOUT, \ DEFAULT_ASYNC_QUERY_TIMEOUT, \
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
-1, "", false, CHECK_PING, \ -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, CHECK_PING, \
/* BDR settings */ \ /* BDR settings */ \
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
/* service settings */ \ /* service settings */ \

View File

@@ -322,6 +322,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# "--no-pid-file" will force PID file creation to be skipped. # "--no-pid-file" will force PID file creation to be skipped.
# Note: there is normally no need to set this, particularly if # Note: there is normally no need to set this, particularly if
# repmgr was installed from packages. # repmgr was installed from packages.
#standby_disconnect_on_failover=false # If "true", in a failover situation wait for all standbys to
# disconnect their WAL receivers before electing a new primary
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover", maximum length of time (in seconds)
# to wait for other standbys to confirm they have disconnected their
# WAL receivers
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# service control commands # service control commands

View File

@@ -91,6 +91,7 @@
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */ #define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */ #define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
#define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */ #define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */

View File

@@ -1989,8 +1989,6 @@ do_primary_failover(void)
static NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER; static NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
int i; int i;
// XXX make configurable
int sibling_nodes_disconnect_timeout = 30;
bool sibling_node_wal_receiver_connected = false; bool sibling_node_wal_receiver_connected = false;
if (PQserverVersion(local_conn) < 90500) if (PQserverVersion(local_conn) < 90500)
@@ -2016,7 +2014,7 @@ do_primary_failover(void)
local_node_info.upstream_node_id, local_node_info.upstream_node_id,
&check_sibling_nodes); &check_sibling_nodes);
for (i = 0; i < sibling_nodes_disconnect_timeout; i++) for (i = 0; i < config_file_options.sibling_nodes_disconnect_timeout; i++)
{ {
for (cell = check_sibling_nodes.head; cell; cell = cell->next) for (cell = check_sibling_nodes.head; cell; cell = cell->next)
{ {
@@ -2048,13 +2046,13 @@ do_primary_failover(void)
} }
log_debug("sleeping %i of max %i seconds (\"sibling_nodes_disconnect_timeout\")", log_debug("sleeping %i of max %i seconds (\"sibling_nodes_disconnect_timeout\")",
i + 1, sibling_nodes_disconnect_timeout) i + 1, config_file_options.sibling_nodes_disconnect_timeout);
sleep(1); sleep(1);
} }
if (sibling_node_wal_receiver_connected == true) if (sibling_node_wal_receiver_connected == true)
{ {
// XXX what do we do here? abort or continue? make configurable? /* TODO: prevent any such nodes becoming promotion candidates */
log_warning(_("WAL receiver still connected on at least one sibling node")); log_warning(_("WAL receiver still connected on at least one sibling node"));
} }
else else