mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
standby clone: check upstream connections after data copy operation
With long-running copy operations, it's possible the connection(s) to the primary/source server may go away for some reason, so recheck their availability before attempting to reuse.
This commit is contained in:
1
HISTORY
1
HISTORY
@@ -18,6 +18,7 @@
|
||||
it will actually be possible to stream from the target node (Ian)
|
||||
repmgr: "standby switchover": improve handling of connection URIs when
|
||||
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
|
||||
repmgr: check for stale connections during "standby clone" (Ian)
|
||||
repmgr: fix long node ID display in "cluster show" (Ian)
|
||||
repmgr: check for primary server before executing "witness register";
|
||||
GitHub #538 (Ian)
|
||||
|
||||
19
dbutils.c
19
dbutils.c
@@ -4272,6 +4272,25 @@ connection_ping(PGconn *conn)
|
||||
}
|
||||
|
||||
|
||||
ExecStatusType
|
||||
connection_ping_reconnect(PGconn *conn)
|
||||
{
|
||||
ExecStatusType ping_result = connection_ping(conn);
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("connection error, attempting to reset"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQreset(conn);
|
||||
ping_result = connection_ping(conn);
|
||||
}
|
||||
|
||||
log_verbose(LOG_DEBUG, "connection_ping_reconnect(): result is %s", PQresStatus(ping_result));
|
||||
|
||||
return ping_result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ==================== */
|
||||
/* monitoring functions */
|
||||
|
||||
@@ -516,6 +516,7 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool is_server_available(const char *conninfo);
|
||||
bool is_server_available_params(t_conninfo_param_list *param_list);
|
||||
ExecStatusType connection_ping(PGconn *conn);
|
||||
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
||||
|
||||
/* monitoring functions */
|
||||
void
|
||||
|
||||
@@ -185,6 +185,14 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr;: when executing <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>,
|
||||
recheck primary/upstream connection(s) after the data copy operation is complete, as these may
|
||||
have gone away.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||
|
||||
@@ -605,7 +605,6 @@ do_standby_clone(void)
|
||||
log_error(_("unknown clone mode"));
|
||||
}
|
||||
|
||||
|
||||
/* If the backup failed then exit */
|
||||
if (r != SUCCESS)
|
||||
{
|
||||
@@ -5794,6 +5793,12 @@ run_basebackup(t_node_info *node_record)
|
||||
if (r != 0)
|
||||
return ERR_BAD_BASEBACKUP;
|
||||
|
||||
/* check connections are still available */
|
||||
(void)connection_ping_reconnect(primary_conn);
|
||||
|
||||
if (source_conn != primary_conn)
|
||||
(void)connection_ping_reconnect(source_conn);
|
||||
|
||||
/*
|
||||
* If replication slots in use, check the created slot is on the correct
|
||||
* node; the slot will initially get created on the source node, and will
|
||||
@@ -6396,6 +6401,15 @@ stop_backup:
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
PGconn *upstream_conn = NULL;
|
||||
|
||||
|
||||
/* check connections are still available */
|
||||
(void)connection_ping_reconnect(primary_conn);
|
||||
|
||||
if (source_conn != primary_conn)
|
||||
(void)connection_ping_reconnect(source_conn);
|
||||
|
||||
(void)connection_ping_reconnect(source_conn);
|
||||
|
||||
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
|
||||
Reference in New Issue
Block a user