diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml
index 89b0dadc..10d245dd 100644
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -37,7 +37,7 @@
&repmgr; will refuse to perform the switchover if an exclusive backup is running on
- the current primary.
+ the current primary, or if WAL replay is paused on the standby.
diff --git a/doc/switchover.sgml b/doc/switchover.sgml
index c70bb99e..894c03db 100644
--- a/doc/switchover.sgml
+++ b/doc/switchover.sgml
@@ -137,8 +137,8 @@
- If an exclusive backup is running on the current primary, &repmgr; will not perform the
- switchover.
+ If an exclusive backup is running on the current primary, or if WAL replay is paused on the standby,
+ &repmgr; will not perform the switchover.
diff --git a/repmgr-action-node.c b/repmgr-action-node.c
index abc3badc..9d4f21bc 100644
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -49,6 +49,7 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
+
/*
* NODE STATUS
*
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index b75488b0..bb499feb 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -3197,24 +3197,7 @@ do_standby_switchover(void)
exit(ERR_DB_QUERY);
}
- /*
- * Check that there are no exclusive backups running on the primary.
- * We don't want to end up damaging the backup and also leaving the server in an
- * state where there's control data saying it's in backup mode but there's no
- * backup_label in PGDATA.
- * If the user wants to do the switchover anyway, they should first stop the
- * backup that's running.
- */
- if (server_in_exclusive_backup_mode(remote_conn) != BACKUP_STATE_NO_BACKUP)
- {
- log_error(_("unable to perform a switchover while primary server is in exclusive backup mode"));
- log_hint(_("stop backup before attempting the switchover"));
-
- PQfinish(local_conn);
- PQfinish(remote_conn);
-
- exit(ERR_SWITCHOVER_FAIL);
- }
+ log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
/*
* Check this standby is attached to the demotion candidate
@@ -3246,7 +3229,63 @@ do_standby_switchover(void)
exit(ERR_BAD_CONFIG);
}
- log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
+ /*
+ * Check that WAL replay on the standby is *not* paused, as that could lead
+ * to unexpected behaviour when the standby is promoted.
+ *
+ * For switchover we'll mandate that WAL replay *must not* be paused.
+ * For a promote operation we can proceed if WAL replay is paused
+ * there is no more available WAL to be replayed, as we can be sure the
+ * primary is down already, but in a switchover context there's
+ * potentially a window for more WAL to be received before we shut down
+ * the primary completely.
+ */
+
+ if (is_wal_replay_paused(local_conn, false) == true)
+ {
+ ReplInfo replication_info;
+ init_replication_info(&replication_info);
+
+ if (get_replication_info(local_conn, &replication_info) == false)
+ {
+ log_error(_("unable to retrieve replication information from local node"));
+ PQfinish(local_conn);
+ exit(ERR_SWITCHOVER_FAIL);
+ }
+
+ log_error(_("WAL replay is paused on this node and it is not safe to proceed"));
+ log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
+ format_lsn(replication_info.last_wal_replay_lsn),
+ format_lsn(replication_info.last_wal_receive_lsn));
+
+ if (PQserverVersion(local_conn) >= 100000)
+ log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
+ else
+ log_hint(_("execute \"pg_xlog_replay_resume()\" to unpause WAL replay"));
+
+ PQfinish(local_conn);
+ exit(ERR_SWITCHOVER_FAIL);
+ }
+
+
+ /*
+ * Check that there are no exclusive backups running on the primary.
+ * We don't want to end up damaging the backup and also leaving the server in an
+ * state where there's control data saying it's in backup mode but there's no
+ * backup_label in PGDATA.
+ * If the user wants to do the switchover anyway, they should first stop the
+ * backup that's running.
+ */
+ if (server_in_exclusive_backup_mode(remote_conn) != BACKUP_STATE_NO_BACKUP)
+ {
+ log_error(_("unable to perform a switchover while primary server is in exclusive backup mode"));
+ log_hint(_("stop backup before attempting the switchover"));
+
+ PQfinish(local_conn);
+ PQfinish(remote_conn);
+
+ exit(ERR_SWITCHOVER_FAIL);
+ }
/* this will fill the %p event notification parameter */
event_info.node_id = remote_node_record.node_id;