diff --git a/configfile.c b/configfile.c
index 76a8b542..367938fc 100644
--- a/configfile.c
+++ b/configfile.c
@@ -333,6 +333,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
options->standby_follow_timeout = DEFAULT_STANDBY_FOLLOW_TIMEOUT;
+ /*------------------------
+ * standby switchover settings
+ *------------------------
+ */
+ options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
+
/*-----------------
* repmgrd settings
*-----------------
@@ -352,7 +358,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->degraded_monitoring_timeout = -1;
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
- options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
+ options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
/*-------------
* witness settings
@@ -539,6 +545,14 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
else if (strcmp(name, "standby_follow_timeout") == 0)
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
+ /* standby switchover settings */
+ else if (strcmp(name, "standby_reconnect_timeout") == 0)
+ options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
+
+ /* node rejoin settings */
+ else if (strcmp(name, "node_rejoin_timeout") == 0)
+ options->node_rejoin_timeout = repmgr_atoi(value, name, error_list, 0);
+
/* node check settings */
else if (strcmp(name, "archive_ready_warning") == 0)
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
@@ -588,8 +602,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->async_query_timeout = repmgr_atoi(value, name, error_list, 0);
else if (strcmp(name, "primary_notification_timeout") == 0)
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
- else if (strcmp(name, "standby_reconnect_timeout") == 0)
- options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
+ else if (strcmp(name, "repmgrd_standby_startup_timeout") == 0)
+ options->repmgrd_standby_startup_timeout = repmgr_atoi(value, name, error_list, 0);
/* witness settings */
else if (strcmp(name, "witness_sync_interval") == 0)
@@ -771,6 +785,18 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
PQconninfoFree(conninfo_options);
}
+
+ /* set values for parameters which default to other parameters */
+
+ /*
+ * From 4.1, "repmgrd_standby_startup_timeout" replaces "standby_reconnect_timeout"
+ * in repmgrd; fall back to "standby_reconnect_timeout" if no value explicitly provided
+ */
+ if (options->repmgrd_standby_startup_timeout == -1)
+ {
+ options->repmgrd_standby_startup_timeout = options->standby_reconnect_timeout;
+ }
+
/* add warning about changed "barman_" parameter meanings */
if ((options->barman_host[0] == '\0' && options->barman_server[0] != '\0') ||
(options->barman_host[0] != '\0' && options->barman_server[0] == '\0'))
@@ -795,6 +821,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
item_list_append(error_list,
_("\replication_lag_critical\" must be greater than \"replication_lag_warning\""));
}
+
+ if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
+ {
+ item_list_append(error_list,
+ _("\"standby_reconnect_timeout\" must be equal to or greater than \"node_rejoin_timeout\""));
+ }
}
@@ -1017,6 +1049,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
* - promote_delay
* - reconnect_attempts
* - reconnect_interval
+ * - repmgrd_standby_startup_timeout
* - retry_promote_interval_secs
*
* non-changeable options
@@ -1233,6 +1266,15 @@ reload_config(t_configuration_options *orig_options)
config_changed = true;
}
+ /* repmgrd_standby_startup_timeout */
+ if (orig_options->repmgrd_standby_startup_timeout != new_options.repmgrd_standby_startup_timeout)
+ {
+ orig_options->repmgrd_standby_startup_timeout = new_options.repmgrd_standby_startup_timeout;
+ log_info(_("\"repmgrd_standby_startup_timeout\" is now \"%i\""), new_options.repmgrd_standby_startup_timeout);
+
+ config_changed = true;
+ }
+
/*
* Handle changes to logging configuration
*/
diff --git a/configfile.h b/configfile.h
index abfd6d0c..2119478c 100644
--- a/configfile.h
+++ b/configfile.h
@@ -102,6 +102,12 @@ typedef struct
int primary_follow_timeout;
int standby_follow_timeout;
+ /* standby switchover settings */
+ int standby_reconnect_timeout;
+
+ /* node rejoin settings */
+ int node_rejoin_timeout;
+
/* node check settings */
int archive_ready_warning;
int archive_ready_critical;
@@ -124,7 +130,7 @@ typedef struct
int degraded_monitoring_timeout;
int async_query_timeout;
int primary_notification_timeout;
- int standby_reconnect_timeout;
+ int repmgrd_standby_startup_timeout;
/* BDR settings */
bool bdr_local_monitoring_only;
@@ -173,6 +179,10 @@ typedef struct
/* standby follow settings */ \
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
+ /* standby switchover settings */ \
+ DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
+ /* node rejoin settings */ \
+ DEFAULT_NODE_REJOIN_TIMEOUT, \
/* node check settings */ \
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
@@ -186,7 +196,7 @@ typedef struct
false, -1, \
DEFAULT_ASYNC_QUERY_TIMEOUT, \
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
- DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
+ -1, \
/* BDR settings */ \
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
/* service settings */ \
diff --git a/doc/repmgr-node-rejoin.sgml b/doc/repmgr-node-rejoin.sgml
index f8015ce1..fd9928af 100644
--- a/doc/repmgr-node-rejoin.sgml
+++ b/doc/repmgr-node-rejoin.sgml
@@ -115,7 +115,24 @@
+
+ Configuration file settings
+
+
+
+
+ node_rejoin_timeout:
+ the maximum length of time (in seconds) to wait for
+ the node to reconnect to the replication cluster (defaults to
+ the value set in standby_reconnect_timeout,
+ 60 seconds).
+
+
+
+
+
+
Event notifications
diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml
index a063d421..da401c66 100644
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -154,8 +154,8 @@
standby_reconnect_timeout:
- Number of seconds to attempt to reconnect to the demoted primary
- once it has been restarted.
+ number of seconds to attempt to wait for the demoted primary
+ to reconnect to the promoted primary (default: 60 seconds)
diff --git a/repmgr-action-node.c b/repmgr-action-node.c
index 81cf72c0..07389e36 100644
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -2274,19 +2274,19 @@ do_node_rejoin(void)
{
log_verbose(LOG_INFO, _("waiting for node %i to respond to pings; %i of max %i attempts"),
config_file_options.node_id,
- i + 1, config_file_options.standby_reconnect_timeout);
+ i + 1, config_file_options.node_rejoin_timeout);
}
else
{
log_debug("sleeping 1 second waiting for node %i to respond to pings; %i of max %i attempts",
config_file_options.node_id,
- i + 1, config_file_options.standby_reconnect_timeout);
+ i + 1, config_file_options.node_rejoin_timeout);
}
sleep(1);
}
- for (; i < config_file_options.standby_reconnect_timeout; i++)
+ for (; i < config_file_options.node_rejoin_timeout; i++)
{
success = is_downstream_node_attached(upstream_conn, config_file_options.node_name);
@@ -2301,13 +2301,13 @@ do_node_rejoin(void)
{
log_info(_("waiting for node %i to connect to new primary; %i of max %i attempts"),
config_file_options.node_id,
- i + 1, config_file_options.standby_reconnect_timeout);
+ i + 1, config_file_options.node_rejoin_timeout);
}
else
{
log_debug("sleeping 1 second waiting for node %i to connect to new primary; %i of max %i attempts",
config_file_options.node_id,
- i + 1, config_file_options.standby_reconnect_timeout);
+ i + 1, config_file_options.node_rejoin_timeout);
}
sleep(1);
diff --git a/repmgr.conf.sample b/repmgr.conf.sample
index 35deee44..8a178c78 100644
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -207,7 +207,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#------------------------------------------------------------------------------
-# Standby follow settings
+# "standby follow" settings
#------------------------------------------------------------------------------
# These settings apply when instructing a standby to follow the new primary
@@ -219,6 +219,28 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# for the standby to connect to the primary
+#------------------------------------------------------------------------------
+# "standby switchover" settings
+#------------------------------------------------------------------------------
+
+# These settings apply when switching roles between a primary and a standby
+# ("repmgr standby switchover").
+
+#standby_reconnect_timeout=60 # The max length of time (in seconds) to wait
+ # for the demoted standby to reconnect to the promoted
+ # primary (note: this value should be equal to or greater
+ # than that set for "node_rejoin_timeout")
+
+#------------------------------------------------------------------------------
+# "node rejoin" settings
+#------------------------------------------------------------------------------
+
+# These settings apply when reintegrating a node into a replication cluster
+# with "repmgrd_node_rejoin"
+
+#node_rejoin_timeout=60 # The maximum length of time (in seconds) to wait for
+ # the node to reconnect to the replication cluster
+
#------------------------------------------------------------------------------
# Barman options
#------------------------------------------------------------------------------
@@ -265,8 +287,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby
# will wait for a notification from the new primary,
# before falling back to degraded monitoring
-#standby_reconnect_timeout=60 # Interval (in seconds) which repmgrd on a standby will wait
- # to reconnect to the local node after executing "follow_command"
+#repmgrd_standby_startup_timeout=60 # Interval (in seconds) which repmgrd on a standby will wait
+ # for the the local node to restart and become ready to accept connections after
+ # executing "follow_command" (defaults to the value set in "standby_reconnect_timeout")
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
diff --git a/repmgr.h b/repmgr.h
index 8bdac74a..1aad9684 100644
--- a/repmgr.h
+++ b/repmgr.h
@@ -85,6 +85,7 @@
#define DEFAULT_PROMOTE_CHECK_TIMEOUT 60 /* seconds */
#define DEFAULT_PROMOTE_CHECK_INTERVAL 1 /* seconds */
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
+#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
#ifndef RECOVERY_COMMAND_FILE
#define RECOVERY_COMMAND_FILE "recovery.conf"
diff --git a/repmgrd-physical.c b/repmgrd-physical.c
index 26da52d4..55af0e78 100644
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
@@ -1941,7 +1941,7 @@ do_upstream_standby_failover(void)
* completes, so poll for a while until we get a connection.
*/
- for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
+ for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
{
local_conn = establish_db_connection(local_node_info.conninfo, false);
@@ -1950,7 +1950,7 @@ do_upstream_standby_failover(void)
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
i + 1,
- config_file_options.standby_reconnect_timeout);
+ config_file_options.repmgrd_standby_startup_timeout);
sleep(1);
}
@@ -2391,7 +2391,7 @@ follow_new_primary(int new_primary_id)
* completes, so poll for a while until we get a connection.
*/
- for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
+ for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
{
local_conn = establish_db_connection(local_node_info.conninfo, false);
@@ -2400,7 +2400,7 @@ follow_new_primary(int new_primary_id)
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
i + 1,
- config_file_options.standby_reconnect_timeout);
+ config_file_options.repmgrd_standby_startup_timeout);
sleep(1);
}