mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
standby clone: check upstream connections after data copy operation
With long-running copy operations, it's possible the connection(s) to the primary/source server may go away for some reason, so recheck their availability before attempting to reuse.
This commit is contained in:
1
HISTORY
1
HISTORY
@@ -18,6 +18,7 @@
|
|||||||
it will actually be possible to stream from the target node (Ian)
|
it will actually be possible to stream from the target node (Ian)
|
||||||
repmgr: "standby switchover": improve handling of connection URIs when
|
repmgr: "standby switchover": improve handling of connection URIs when
|
||||||
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
|
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
|
||||||
|
repmgr: check for stale connections during "standby clone" (Ian)
|
||||||
repmgr: fix long node ID display in "cluster show" (Ian)
|
repmgr: fix long node ID display in "cluster show" (Ian)
|
||||||
repmgr: check for primary server before executing "witness register";
|
repmgr: check for primary server before executing "witness register";
|
||||||
GitHub #538 (Ian)
|
GitHub #538 (Ian)
|
||||||
|
|||||||
19
dbutils.c
19
dbutils.c
@@ -4272,6 +4272,25 @@ connection_ping(PGconn *conn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ExecStatusType
|
||||||
|
connection_ping_reconnect(PGconn *conn)
|
||||||
|
{
|
||||||
|
ExecStatusType ping_result = connection_ping(conn);
|
||||||
|
|
||||||
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("connection error, attempting to reset"));
|
||||||
|
log_detail("%s", PQerrorMessage(conn));
|
||||||
|
PQreset(conn);
|
||||||
|
ping_result = connection_ping(conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "connection_ping_reconnect(): result is %s", PQresStatus(ping_result));
|
||||||
|
|
||||||
|
return ping_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ==================== */
|
/* ==================== */
|
||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
|
|||||||
@@ -515,6 +515,7 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
|||||||
bool is_server_available(const char *conninfo);
|
bool is_server_available(const char *conninfo);
|
||||||
bool is_server_available_params(t_conninfo_param_list *param_list);
|
bool is_server_available_params(t_conninfo_param_list *param_list);
|
||||||
ExecStatusType connection_ping(PGconn *conn);
|
ExecStatusType connection_ping(PGconn *conn);
|
||||||
|
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
||||||
|
|
||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
void
|
void
|
||||||
|
|||||||
@@ -185,6 +185,14 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>,
|
||||||
|
recheck primary/upstream connection(s) after the data copy operation is complete, as these may
|
||||||
|
have gone away.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
|
|||||||
@@ -605,7 +605,6 @@ do_standby_clone(void)
|
|||||||
log_error(_("unknown clone mode"));
|
log_error(_("unknown clone mode"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* If the backup failed then exit */
|
/* If the backup failed then exit */
|
||||||
if (r != SUCCESS)
|
if (r != SUCCESS)
|
||||||
{
|
{
|
||||||
@@ -5794,6 +5793,12 @@ run_basebackup(t_node_info *node_record)
|
|||||||
if (r != 0)
|
if (r != 0)
|
||||||
return ERR_BAD_BASEBACKUP;
|
return ERR_BAD_BASEBACKUP;
|
||||||
|
|
||||||
|
/* check connections are still available */
|
||||||
|
(void)connection_ping_reconnect(primary_conn);
|
||||||
|
|
||||||
|
if (source_conn != primary_conn)
|
||||||
|
(void)connection_ping_reconnect(source_conn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If replication slots in use, check the created slot is on the correct
|
* If replication slots in use, check the created slot is on the correct
|
||||||
* node; the slot will initially get created on the source node, and will
|
* node; the slot will initially get created on the source node, and will
|
||||||
@@ -6396,6 +6401,15 @@ stop_backup:
|
|||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
PGconn *upstream_conn = NULL;
|
PGconn *upstream_conn = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
/* check connections are still available */
|
||||||
|
(void)connection_ping_reconnect(primary_conn);
|
||||||
|
|
||||||
|
if (source_conn != primary_conn)
|
||||||
|
(void)connection_ping_reconnect(source_conn);
|
||||||
|
|
||||||
|
(void)connection_ping_reconnect(source_conn);
|
||||||
|
|
||||||
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
|
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
|
||||||
|
|
||||||
if (record_status != RECORD_FOUND)
|
if (record_status != RECORD_FOUND)
|
||||||
|
|||||||
Reference in New Issue
Block a user