diff --git a/HISTORY b/HISTORY
index 996033cc..0c1fa8a4 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,3 +1,7 @@
+4.2.0 2018-??-??
+ repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
+ GitHub #504 (Ian)
+
4.1.1 2018-09-05
logging: explicitly log the text of failed queries as ERRORs to
assist logfile analysis; GitHub #498
diff --git a/configfile.c b/configfile.c
index c4c060cc..4e345991 100644
--- a/configfile.c
+++ b/configfile.c
@@ -335,6 +335,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
* standby switchover settings
*------------------------
*/
+ options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
/*-----------------
@@ -545,6 +546,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
/* standby switchover settings */
+ else if (strcmp(name, "shutdown_check_timeout") == 0)
+ options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
else if (strcmp(name, "standby_reconnect_timeout") == 0)
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
diff --git a/configfile.h b/configfile.h
index abe87225..975c8f8e 100644
--- a/configfile.h
+++ b/configfile.h
@@ -103,6 +103,7 @@ typedef struct
int standby_follow_timeout;
/* standby switchover settings */
+ int shutdown_check_timeout;
int standby_reconnect_timeout;
/* node rejoin settings */
@@ -181,6 +182,7 @@ typedef struct
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
/* standby switchover settings */ \
+ DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
/* node rejoin settings */ \
DEFAULT_NODE_REJOIN_TIMEOUT, \
diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml
index 30c69a5b..89140bb4 100644
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -141,19 +141,7 @@
Note that following parameters in repmgr.conf are relevant to the
switchover operation:
-
-
- reconnect_attempts: number of times to check the original primary
- for a clean shutdown after executing the shutdown command, before aborting
-
-
-
-
- reconnect_interval: interval (in seconds) to check the original
- primary for a clean shutdown after executing the shutdown command (up to a maximum
- of reconnect_attempts tries)
-
-
+
replication_lag_critical:
@@ -163,10 +151,24 @@
+
+
+ shutdown_check_timeout: maximum number of seconds to wait for the
+ demotion candidate (current primary) to shut down, before aborting the switchover.
+
+
+
+ In versions prior to &repmgr; 4.2, repmgr standby switchover would
+ use the values defined in reconnect_attempts and reconnect_interval
+ to determine the timeout for demotion candidate shutdown.
+
+
+
+
standby_reconnect_timeout:
- number of seconds to attempt to wait for the demoted primary
+ maximum number of seconds to attempt to wait for the demoted primary
to reconnect to the promoted primary (default: 60 seconds)
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index 6de5769b..e6f91e75 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -3666,13 +3666,14 @@ do_standby_switchover(void)
/* loop for timeout waiting for current primary to stop */
- for (i = 0; i < config_file_options.reconnect_attempts; i++)
+ for (i = 0; i < config_file_options.shutdown_check_timeout; i++)
{
/* Check whether primary is available */
PGPing ping_res;
- log_info(_("checking primary status; %i of %i attempts"),
- i + 1, config_file_options.reconnect_attempts);
+ log_info(_("checking for primary shutdown; %i of %i attempts (\"shutdown_check_timeout\")"),
+ i + 1, config_file_options.shutdown_check_timeout);
+
ping_res = PQping(remote_conninfo);
log_debug("ping status is: %s", print_pqping_status(ping_res));
@@ -3741,9 +3742,8 @@ do_standby_switchover(void)
termPQExpBuffer(&command_output);
}
- log_debug("sleeping %i seconds (\"reconnect_interval\") until next check",
- config_file_options.reconnect_interval);
- sleep(config_file_options.reconnect_interval);
+ log_debug("sleeping 1 second until next check");
+ sleep(1);
}
if (shutdown_success == false)
diff --git a/repmgr.conf.sample b/repmgr.conf.sample
index b5b5d710..28296f40 100644
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -231,6 +231,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# These settings apply when switching roles between a primary and a standby
# ("repmgr standby switchover").
+#shutdown_check_timeout=60 # The max length of time (in seconds) to wait for the demotion
+ # candidate (current primary) to shut down
#standby_reconnect_timeout=60 # The max length of time (in seconds) to wait
# for the demoted standby to reconnect to the promoted
# primary (note: this value should be equal to or greater
diff --git a/repmgr.h b/repmgr.h
index 1aad9684..8bf4ec4f 100644
--- a/repmgr.h
+++ b/repmgr.h
@@ -84,6 +84,7 @@
#define DEFAULT_WAIT_START 30 /* seconds */
#define DEFAULT_PROMOTE_CHECK_TIMEOUT 60 /* seconds */
#define DEFAULT_PROMOTE_CHECK_INTERVAL 1 /* seconds */
+#define DEFAULT_SHUTDOWN_CHECK_TIMEOUT 60 /* seconds */
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */