diff --git a/HISTORY b/HISTORY
index 74207f6d..bf057b9e 100644
--- a/HISTORY
+++ b/HISTORY
@@ -4,6 +4,8 @@
repmgr: add --terse option to "cluster show"; GitHub #521 (Ian)
repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian)
repmgr: add "node check --data-directory-config"; GitHub #523 (Ian)
+ repmgr: prevent potential race condition in "standby switchover"
+ when checking received WAL location; GitHub #518 (Ian)
repmgr: ensure "standby switchover" verifies repmgr can read the
data directory on the demotion candidate; GitHub #523 (Ian)
repmgr: when executing "standby follow" and "node rejoin", check that
diff --git a/configfile.c b/configfile.c
index 62cfa3ee..9bb66076 100644
--- a/configfile.c
+++ b/configfile.c
@@ -335,6 +335,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
*/
options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
+ options->wal_receive_check_timeout = DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT;
/*-----------------
* repmgrd settings
@@ -557,6 +558,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
else if (strcmp(name, "standby_reconnect_timeout") == 0)
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
+ else if (strcmp(name, "wal_receive_check_timeout") == 0)
+ options->wal_receive_check_timeout = repmgr_atoi(value, name, error_list, 0);
/* node rejoin settings */
else if (strcmp(name, "node_rejoin_timeout") == 0)
diff --git a/configfile.h b/configfile.h
index a7591a83..095b813d 100644
--- a/configfile.h
+++ b/configfile.h
@@ -106,6 +106,7 @@ typedef struct
/* standby switchover settings */
int shutdown_check_timeout;
int standby_reconnect_timeout;
+ int wal_receive_check_timeout;
/* node rejoin settings */
int node_rejoin_timeout;
@@ -189,6 +190,7 @@ typedef struct
/* standby switchover settings */ \
DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
+ DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT, \
/* node rejoin settings */ \
DEFAULT_NODE_REJOIN_TIMEOUT, \
/* node check settings */ \
diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml
index cf7513bb..b86c960a 100644
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -105,6 +105,15 @@
+
+
+ Add check repmgr standby switchover
+ when comparing received WAL on the standby to the primary's shutdown location to avoid a potential
+ race condition if the standby's walreceiver has not yet flushed all received WAL to disk.
+ GitHub #518.
+
+
+
diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml
index cc4939a6..72cac116 100644
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -168,20 +168,6 @@
-
-
- x
- with "repmgr standby switchover "
-
-
-
-
-
-
-
-
-
-
replication_lag_critical
@@ -207,7 +193,7 @@
- maximum number of seconds to wait for the
+ The maximum number of seconds to wait for the
demotion candidate (current primary) to shut down, before aborting the switchover.
@@ -225,7 +211,25 @@
-
+
+
+
+ wal_receive_check_timeout
+ with "repmgr standby switchover "
+
+
+
+
+
+ After the primary has shut down, the maximum number of seconds to wait for the
+ walreceiver on the standby to flush WAL to disk before comparing WAL receive location
+ with the primary's shut down location.
+
+
+
+
+
+ standby_reconnect_timeoutwith "repmgr standby switchover "
@@ -234,8 +238,8 @@
- maximum number of seconds to attempt to wait for the demotion candidate (former primary)
- to reconnect to the promoted primary (default: 60 seconds)
+ The maximum number of seconds to attempt to wait for the demotion candidate (former primary)
+ to reconnect to the promoted primary (default: 60 seconds)
Note that this parameter is set on the node where repmgr standby switchover
@@ -245,7 +249,6 @@
-
node_rejoin_timeout
@@ -265,7 +268,7 @@
However, this value must be less than on the
- promotion candidate (node where repmgr standby switchover is executed).
+ promotion candidate (the node where repmgr standby switchover is executed).
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index be829c93..dea953fa 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -3423,7 +3423,6 @@ do_standby_switchover(void)
{
/* include walsender for promotion candidate in total */
-
for (cell = sibling_nodes.head; cell; cell = cell->next)
{
/* get host from node record */
@@ -4180,7 +4179,37 @@ do_standby_switchover(void)
log_verbose(LOG_INFO, _("successfully reconnected to local node"));
}
- get_replication_info(local_conn, &replication_info);
+ /*
+ * Compare standby's last WAL receive location with the primary's last
+ * checkpoint LSN. We'll loop for a while as it's possible the standby's
+ * walreceiver has not yet flushed all received WAL to disk.
+ */
+ {
+ bool notice_emitted = false;
+
+ for (i = 0; i < config_file_options.wal_receive_check_timeout; i++)
+ {
+ get_replication_info(local_conn, &replication_info);
+ if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn)
+ break;
+
+ /*
+ * We'll only output this notice if it looks like we're going to have
+ * to wait for WAL to be flushed.
+ */
+ if (notice_emitted == false)
+ {
+ log_notice(_("waiting up to %i seconds (parameter \"wal_receive_check_timeout\") for received WAL to flush to disk"),
+ config_file_options.wal_receive_check_timeout);
+
+ notice_emitted = true;
+ }
+
+ log_info(_("sleeping %i of maximum %i seconds waiting for standby to flush received WAL to disk"),
+ i + 1, config_file_options.wal_receive_check_timeout);
+ sleep(1);
+ }
+ }
if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
{
@@ -4200,6 +4229,10 @@ do_standby_switchover(void)
}
}
+ log_debug("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X",
+ format_lsn(replication_info.last_wal_receive_lsn),
+ format_lsn(remote_last_checkpoint_lsn));
+
/* promote standby (local node) */
_do_standby_promote_internal(local_conn, server_version_num);
diff --git a/repmgr.conf.sample b/repmgr.conf.sample
index 5541e94a..1612508a 100644
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -241,6 +241,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# for the demoted standby to reconnect to the promoted
# primary (note: this value should be equal to or greater
# than that set for "node_rejoin_timeout")
+#wal_receive_check_timeout=30 # The max length of time (in seconds) to wait for the walreceiver
+ # on the standby to flush WAL to disk before comparing location
+ # with the shut-down primary
#------------------------------------------------------------------------------
# "node rejoin" settings
diff --git a/repmgr.h b/repmgr.h
index 4377434c..1dde2c8c 100644
--- a/repmgr.h
+++ b/repmgr.h
@@ -88,6 +88,7 @@
#define DEFAULT_SHUTDOWN_CHECK_TIMEOUT 60 /* seconds */
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
#define DEFAULT_NODE_REJOIN_TIMEOUT 60 /* seconds */
+#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
#ifndef RECOVERY_COMMAND_FILE
#define RECOVERY_COMMAND_FILE "recovery.conf"