diff --git a/HISTORY b/HISTORY
index f384e151..6a629bf2 100644
--- a/HISTORY
+++ b/HISTORY
@@ -18,6 +18,7 @@
it will actually be possible to stream from the target node (Ian)
repmgr: "standby switchover": improve handling of connection URIs when
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
+ repmgr: check for stale connections during "standby clone" (Ian)
repmgr: fix long node ID display in "cluster show" (Ian)
repmgr: check for primary server before executing "witness register";
GitHub #538 (Ian)
diff --git a/dbutils.c b/dbutils.c
index fd5bb677..fc9c24c3 100644
--- a/dbutils.c
+++ b/dbutils.c
@@ -4272,6 +4272,25 @@ connection_ping(PGconn *conn)
}
+ExecStatusType
+connection_ping_reconnect(PGconn *conn)
+{
+ ExecStatusType ping_result = connection_ping(conn);
+
+ if (PQstatus(conn) != CONNECTION_OK)
+ {
+ log_warning(_("connection error, attempting to reset"));
+ log_detail("%s", PQerrorMessage(conn));
+ PQreset(conn);
+ ping_result = connection_ping(conn);
+ }
+
+ log_verbose(LOG_DEBUG, "connection_ping_reconnect(): result is %s", PQresStatus(ping_result));
+
+ return ping_result;
+}
+
+
/* ==================== */
/* monitoring functions */
diff --git a/dbutils.h b/dbutils.h
index 2aa640c1..6f8b0b4b 100644
--- a/dbutils.h
+++ b/dbutils.h
@@ -516,6 +516,7 @@ int wait_connection_availability(PGconn *conn, long long timeout);
bool is_server_available(const char *conninfo);
bool is_server_available_params(t_conninfo_param_list *param_list);
ExecStatusType connection_ping(PGconn *conn);
+ExecStatusType connection_ping_reconnect(PGconn *conn);
/* monitoring functions */
void
diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml
index 8aeda485..926748e7 100644
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -185,6 +185,14 @@ REPMGRD_OPTS="--daemonize=false"
+
+
+ &repmgr;: when executing repmgr standby clone,
+ recheck primary/upstream connection(s) after the data copy operation is complete, as these may
+ have gone away.
+
+
+
&repmgr;: when executing repmgr standby switchover,
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index 1394eac6..ebe75ec9 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -605,7 +605,6 @@ do_standby_clone(void)
log_error(_("unknown clone mode"));
}
-
/* If the backup failed then exit */
if (r != SUCCESS)
{
@@ -5794,6 +5793,12 @@ run_basebackup(t_node_info *node_record)
if (r != 0)
return ERR_BAD_BASEBACKUP;
+ /* check connections are still available */
+ (void)connection_ping_reconnect(primary_conn);
+
+ if (source_conn != primary_conn)
+ (void)connection_ping_reconnect(source_conn);
+
/*
* If replication slots in use, check the created slot is on the correct
* node; the slot will initially get created on the source node, and will
@@ -6396,6 +6401,15 @@ stop_backup:
RecordStatus record_status = RECORD_NOT_FOUND;
PGconn *upstream_conn = NULL;
+
+ /* check connections are still available */
+ (void)connection_ping_reconnect(primary_conn);
+
+ if (source_conn != primary_conn)
+ (void)connection_ping_reconnect(source_conn);
+
+ (void)connection_ping_reconnect(source_conn);
+
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
if (record_status != RECORD_FOUND)