mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 08:36:30 +00:00
repmgr: add parameter "shutdown_check_timeout"
Previously, "repmgr standby switchover" used the configuration file parameters "reconnect_interval" and "reconnect_attempts" to define a timeout to determine whether the current primary (demotion candidate) has shut down. However, these parameters are intended for primary failure detection and are generally lower in value, while a controlled shutdown may take longer, resulting in the switchover being aborted as repmgr was not waiting long enough. To prevent this happening, parameter "shutdown_check_timeout" has been added. This complements the existing "standby_reconnect_timeout" parameter used by "repmgr standby switchover". Implements GitHub #504.
This commit is contained in:
4
HISTORY
4
HISTORY
@@ -1,3 +1,7 @@
|
|||||||
|
4.2.0 2018-??-??
|
||||||
|
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
||||||
|
GitHub #504 (Ian)
|
||||||
|
|
||||||
4.1.1 2018-09-05
|
4.1.1 2018-09-05
|
||||||
logging: explicitly log the text of failed queries as ERRORs to
|
logging: explicitly log the text of failed queries as ERRORs to
|
||||||
assist logfile analysis; GitHub #498
|
assist logfile analysis; GitHub #498
|
||||||
|
|||||||
@@ -335,6 +335,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
* standby switchover settings
|
* standby switchover settings
|
||||||
*------------------------
|
*------------------------
|
||||||
*/
|
*/
|
||||||
|
options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
|
||||||
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||||
|
|
||||||
/*-----------------
|
/*-----------------
|
||||||
@@ -545,6 +546,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
|
||||||
/* standby switchover settings */
|
/* standby switchover settings */
|
||||||
|
else if (strcmp(name, "shutdown_check_timeout") == 0)
|
||||||
|
options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||||
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
|
||||||
|
|||||||
@@ -103,6 +103,7 @@ typedef struct
|
|||||||
int standby_follow_timeout;
|
int standby_follow_timeout;
|
||||||
|
|
||||||
/* standby switchover settings */
|
/* standby switchover settings */
|
||||||
|
int shutdown_check_timeout;
|
||||||
int standby_reconnect_timeout;
|
int standby_reconnect_timeout;
|
||||||
|
|
||||||
/* node rejoin settings */
|
/* node rejoin settings */
|
||||||
@@ -181,6 +182,7 @@ typedef struct
|
|||||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||||
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
|
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
|
||||||
/* standby switchover settings */ \
|
/* standby switchover settings */ \
|
||||||
|
DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
|
||||||
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||||
/* node rejoin settings */ \
|
/* node rejoin settings */ \
|
||||||
DEFAULT_NODE_REJOIN_TIMEOUT, \
|
DEFAULT_NODE_REJOIN_TIMEOUT, \
|
||||||
|
|||||||
@@ -141,19 +141,7 @@
|
|||||||
Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
|
Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||||
switchover operation:
|
switchover operation:
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<literal>reconnect_attempts</literal>: number of times to check the original primary
|
|
||||||
for a clean shutdown after executing the shutdown command, before aborting
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<literal>reconnect_interval</literal>: interval (in seconds) to check the original
|
|
||||||
primary for a clean shutdown after executing the shutdown command (up to a maximum
|
|
||||||
of <literal>reconnect_attempts</literal> tries)
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>replication_lag_critical</literal>:
|
<literal>replication_lag_critical</literal>:
|
||||||
@@ -163,10 +151,24 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<literal>shutdown_check_timeout</literal>: maximum number of seconds to wait for the
|
||||||
|
demotion candidate (current primary) to shut down, before aborting the switchover.
|
||||||
|
</simpara>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
In versions prior to &repmgr; 4.2, <command>repmgr standby switchover</command> would
|
||||||
|
use the values defined in <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
|
||||||
|
to determine the timeout for demotion candidate shutdown.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>standby_reconnect_timeout</literal>:
|
<literal>standby_reconnect_timeout</literal>:
|
||||||
number of seconds to attempt to wait for the demoted primary
|
maximum number of seconds to attempt to wait for the demoted primary
|
||||||
to reconnect to the promoted primary (default: 60 seconds)
|
to reconnect to the promoted primary (default: 60 seconds)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|||||||
@@ -3666,13 +3666,14 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
/* loop for timeout waiting for current primary to stop */
|
/* loop for timeout waiting for current primary to stop */
|
||||||
|
|
||||||
for (i = 0; i < config_file_options.reconnect_attempts; i++)
|
for (i = 0; i < config_file_options.shutdown_check_timeout; i++)
|
||||||
{
|
{
|
||||||
/* Check whether primary is available */
|
/* Check whether primary is available */
|
||||||
PGPing ping_res;
|
PGPing ping_res;
|
||||||
|
|
||||||
log_info(_("checking primary status; %i of %i attempts"),
|
log_info(_("checking for primary shutdown; %i of %i attempts (\"shutdown_check_timeout\")"),
|
||||||
i + 1, config_file_options.reconnect_attempts);
|
i + 1, config_file_options.shutdown_check_timeout);
|
||||||
|
|
||||||
ping_res = PQping(remote_conninfo);
|
ping_res = PQping(remote_conninfo);
|
||||||
|
|
||||||
log_debug("ping status is: %s", print_pqping_status(ping_res));
|
log_debug("ping status is: %s", print_pqping_status(ping_res));
|
||||||
@@ -3741,9 +3742,8 @@ do_standby_switchover(void)
|
|||||||
termPQExpBuffer(&command_output);
|
termPQExpBuffer(&command_output);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("sleeping %i seconds (\"reconnect_interval\") until next check",
|
log_debug("sleeping 1 second until next check");
|
||||||
config_file_options.reconnect_interval);
|
sleep(1);
|
||||||
sleep(config_file_options.reconnect_interval);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shutdown_success == false)
|
if (shutdown_success == false)
|
||||||
|
|||||||
@@ -231,6 +231,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# These settings apply when switching roles between a primary and a standby
|
# These settings apply when switching roles between a primary and a standby
|
||||||
# ("repmgr standby switchover").
|
# ("repmgr standby switchover").
|
||||||
|
|
||||||
|
#shutdown_check_timeout=60 # The max length of time (in seconds) to wait for the demotion
|
||||||
|
# candidate (current primary) to shut down
|
||||||
#standby_reconnect_timeout=60 # The max length of time (in seconds) to wait
|
#standby_reconnect_timeout=60 # The max length of time (in seconds) to wait
|
||||||
# for the demoted standby to reconnect to the promoted
|
# for the demoted standby to reconnect to the promoted
|
||||||
# primary (note: this value should be equal to or greater
|
# primary (note: this value should be equal to or greater
|
||||||
|
|||||||
1
repmgr.h
1
repmgr.h
@@ -84,6 +84,7 @@
|
|||||||
#define DEFAULT_WAIT_START 30 /* seconds */
|
#define DEFAULT_WAIT_START 30 /* seconds */
|
||||||
#define DEFAULT_PROMOTE_CHECK_TIMEOUT 60 /* seconds */
|
#define DEFAULT_PROMOTE_CHECK_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_PROMOTE_CHECK_INTERVAL 1 /* seconds */
|
#define DEFAULT_PROMOTE_CHECK_INTERVAL 1 /* seconds */
|
||||||
|
#define DEFAULT_SHUTDOWN_CHECK_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
|
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
|
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user