mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
repmgrd: retry standby connection after cascading standby failover
This commit is contained in:
@@ -1694,7 +1694,7 @@ do_upstream_standby_failover(void)
|
|||||||
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
RecoveryType primary_type = RECTYPE_UNKNOWN;
|
RecoveryType primary_type = RECTYPE_UNKNOWN;
|
||||||
int r;
|
int i, r;
|
||||||
char parsed_follow_command[MAXPGPATH] = "";
|
char parsed_follow_command[MAXPGPATH] = "";
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -1779,8 +1779,30 @@ do_upstream_standby_failover(void)
|
|||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reconnect to local node */
|
/*
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
* It's possible that the standby is still starting up after the "follow_command"
|
||||||
|
* completes, so poll for a while until we get a connection.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
||||||
|
{
|
||||||
|
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
|
break;
|
||||||
|
|
||||||
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
|
i + 1,
|
||||||
|
config_file_options.standby_reconnect_timeout);
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to reconnect to local node %i"),
|
||||||
|
local_node_info.node_id);
|
||||||
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
/* refresh shared memory settings which will have been zapped by the restart */
|
/* refresh shared memory settings which will have been zapped by the restart */
|
||||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||||
@@ -2060,7 +2082,7 @@ follow_new_primary(int new_primary_id)
|
|||||||
char parsed_follow_command[MAXPGPATH] = "";
|
char parsed_follow_command[MAXPGPATH] = "";
|
||||||
|
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
int r;
|
int i, r;
|
||||||
|
|
||||||
/* Store details of the failed node here */
|
/* Store details of the failed node here */
|
||||||
t_node_info failed_primary = T_NODE_INFO_INITIALIZER;
|
t_node_info failed_primary = T_NODE_INFO_INITIALIZER;
|
||||||
@@ -2194,15 +2216,11 @@ follow_new_primary(int new_primary_id)
|
|||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* It's possible that the standby is still starting up after the "follow_command"
|
* It's possible that the standby is still starting up after the "follow_command"
|
||||||
* completes, so poll for a while until we get a connection.
|
* completes, so poll for a while until we get a connection.
|
||||||
*
|
|
||||||
* TODO:
|
|
||||||
* - implement for cascading standby follow too
|
|
||||||
*/
|
*/
|
||||||
int i;
|
|
||||||
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
||||||
{
|
{
|
||||||
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
||||||
@@ -2222,7 +2240,6 @@ follow_new_primary(int new_primary_id)
|
|||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* refresh shared memory settings which will have been zapped by the restart */
|
/* refresh shared memory settings which will have been zapped by the restart */
|
||||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||||
|
|||||||
Reference in New Issue
Block a user