diff --git a/HISTORY b/HISTORY index 74207f6d..bf057b9e 100644 --- a/HISTORY +++ b/HISTORY @@ -4,6 +4,8 @@ repmgr: add --terse option to "cluster show"; GitHub #521 (Ian) repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian) repmgr: add "node check --data-directory-config"; GitHub #523 (Ian) + repmgr: prevent potential race condition in "standby switchover" + when checking received WAL location; GitHub #518 (Ian) repmgr: ensure "standby switchover" verifies repmgr can read the data directory on the demotion candidate; GitHub #523 (Ian) repmgr: when executing "standby follow" and "node rejoin", check that diff --git a/configfile.c b/configfile.c index 62cfa3ee..9bb66076 100644 --- a/configfile.c +++ b/configfile.c @@ -335,6 +335,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * */ options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT; options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT; + options->wal_receive_check_timeout = DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT; /*----------------- * repmgrd settings @@ -557,6 +558,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0); else if (strcmp(name, "standby_reconnect_timeout") == 0) options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0); + else if (strcmp(name, "wal_receive_check_timeout") == 0) + options->wal_receive_check_timeout = repmgr_atoi(value, name, error_list, 0); /* node rejoin settings */ else if (strcmp(name, "node_rejoin_timeout") == 0) diff --git a/configfile.h b/configfile.h index a7591a83..095b813d 100644 --- a/configfile.h +++ b/configfile.h @@ -106,6 +106,7 @@ typedef struct /* standby switchover settings */ int shutdown_check_timeout; int standby_reconnect_timeout; + int wal_receive_check_timeout; /* node rejoin settings */ int node_rejoin_timeout; @@ -189,6 +190,7 @@ typedef struct /* standby switchover settings */ \ DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \ DEFAULT_STANDBY_RECONNECT_TIMEOUT, \ + DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT, \ /* node rejoin settings */ \ DEFAULT_NODE_REJOIN_TIMEOUT, \ /* node check settings */ \ diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index cf7513bb..b86c960a 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -105,6 +105,15 @@ + + + Add check repmgr standby switchover + when comparing received WAL on the standby to the primary's shutdown location to avoid a potential + race condition if the standby's walreceiver has not yet flushed all received WAL to disk. + GitHub #518. + + + diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml index cc4939a6..72cac116 100644 --- a/doc/repmgr-standby-switchover.sgml +++ b/doc/repmgr-standby-switchover.sgml @@ -168,20 +168,6 @@ - - - x - with "repmgr standby switchover " - - - - - - - - - - replication_lag_critical @@ -207,7 +193,7 @@ - maximum number of seconds to wait for the + The maximum number of seconds to wait for the demotion candidate (current primary) to shut down, before aborting the switchover. @@ -225,7 +211,25 @@ - + + + + wal_receive_check_timeout + with "repmgr standby switchover " + + + + + + After the primary has shut down, the maximum number of seconds to wait for the + walreceiver on the standby to flush WAL to disk before comparing WAL receive location + with the primary's shut down location. + + + + + + standby_reconnect_timeout with "repmgr standby switchover " @@ -234,8 +238,8 @@ - maximum number of seconds to attempt to wait for the demotion candidate (former primary) - to reconnect to the promoted primary (default: 60 seconds) + The maximum number of seconds to attempt to wait for the demotion candidate (former primary) + to reconnect to the promoted primary (default: 60 seconds) Note that this parameter is set on the node where repmgr standby switchover @@ -245,7 +249,6 @@ - node_rejoin_timeout @@ -265,7 +268,7 @@ However, this value must be less than on the - promotion candidate (node where repmgr standby switchover is executed). + promotion candidate (the node where repmgr standby switchover is executed). diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index be829c93..dea953fa 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3423,7 +3423,6 @@ do_standby_switchover(void) { /* include walsender for promotion candidate in total */ - for (cell = sibling_nodes.head; cell; cell = cell->next) { /* get host from node record */ @@ -4180,7 +4179,37 @@ do_standby_switchover(void) log_verbose(LOG_INFO, _("successfully reconnected to local node")); } - get_replication_info(local_conn, &replication_info); + /* + * Compare standby's last WAL receive location with the primary's last + * checkpoint LSN. We'll loop for a while as it's possible the standby's + * walreceiver has not yet flushed all received WAL to disk. + */ + { + bool notice_emitted = false; + + for (i = 0; i < config_file_options.wal_receive_check_timeout; i++) + { + get_replication_info(local_conn, &replication_info); + if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn) + break; + + /* + * We'll only output this notice if it looks like we're going to have + * to wait for WAL to be flushed. + */ + if (notice_emitted == false) + { + log_notice(_("waiting up to %i seconds (parameter \"wal_receive_check_timeout\") for received WAL to flush to disk"), + config_file_options.wal_receive_check_timeout); + + notice_emitted = true; + } + + log_info(_("sleeping %i of maximum %i seconds waiting for standby to flush received WAL to disk"), + i + 1, config_file_options.wal_receive_check_timeout); + sleep(1); + } + } if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) { @@ -4200,6 +4229,10 @@ do_standby_switchover(void) } } + log_debug("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X", + format_lsn(replication_info.last_wal_receive_lsn), + format_lsn(remote_last_checkpoint_lsn)); + /* promote standby (local node) */ _do_standby_promote_internal(local_conn, server_version_num); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 5541e94a..1612508a 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -241,6 +241,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # for the demoted standby to reconnect to the promoted # primary (note: this value should be equal to or greater # than that set for "node_rejoin_timeout") +#wal_receive_check_timeout=30 # The max length of time (in seconds) to wait for the walreceiver + # on the standby to flush WAL to disk before comparing location + # with the shut-down primary #------------------------------------------------------------------------------ # "node rejoin" settings diff --git a/repmgr.h b/repmgr.h index 4377434c..1dde2c8c 100644 --- a/repmgr.h +++ b/repmgr.h @@ -88,6 +88,7 @@ #define DEFAULT_SHUTDOWN_CHECK_TIMEOUT 60 /* seconds */ #define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */ #define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */ +#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #ifndef RECOVERY_COMMAND_FILE #define RECOVERY_COMMAND_FILE "recovery.conf"