From 5e8b41e221974c4d02b450165634d235ccc98de6 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 28 Feb 2018 15:35:47 +0900 Subject: [PATCH] repmgrd: retry standby connection after "follow_command" executed It's possible that the standby is still starting up after the "follow_command" completes, so poll for a while until we get a connection. --- repmgrd-physical.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 549d1ece..828c55a9 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -2179,8 +2179,6 @@ follow_new_primary(int new_primary_id) return FAILOVER_STATE_FOLLOW_FAIL; } - - /* * refresh local copy of local and primary node records - we get these * directly from the primary to ensure they're the current version @@ -2203,7 +2201,34 @@ follow_new_primary(int new_primary_id) return FAILOVER_STATE_FOLLOW_FAIL; } - local_conn = establish_db_connection(local_node_info.conninfo, false); + { + /* + * It's possible that the standby is still starting up after the "follow_command" + * completes, so poll for a while until we get a connection. + * + * TODO: + * - implement for cascading standby follow too + * - make timeout configurable ("standby_reconnect_timeout") + */ + int i, max = 60; + for (i = 0; i < max; i++) + { + local_conn = establish_db_connection(local_node_info.conninfo, false); + + if (PQstatus(local_conn) == CONNECTION_OK) + break; + + log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node", i + 1, max); + sleep(1); + } + + if (PQstatus(local_conn) != CONNECTION_OK) + { + log_error(_("unable to reconnect to local node %i"), + local_node_info.node_id); + return FAILOVER_STATE_FOLLOW_FAIL; + } + } /* refresh shared memory settings which will have been zapped by the restart */ repmgrd_set_local_node_id(local_conn, config_file_options.node_id); @@ -2216,8 +2241,7 @@ follow_new_primary(int new_primary_id) log_notice("%s", event_details.data); - create_event_notification( - upstream_conn, + create_event_notification(upstream_conn, &config_file_options, local_node_info.node_id, "repmgrd_failover_follow",