From 668b2c9b59e626c2efce0c031c0a0c866221ea94 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 22 Feb 2019 14:04:37 +0900 Subject: [PATCH] repmgrd: use PQping() as a first test of whether an upstream node is available It's possible the upstream node may be temporarily not accepting connections but is still running, so we only confirm that connections are not possible once PQping() reports a negative result. This feature has been adapted from repmgr4. --- dbutils.c | 16 ++++++++++++++++ dbutils.h | 1 + repmgrd.c | 9 +++++---- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/dbutils.c b/dbutils.c index 3f126eb6..fe9ff684 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2137,3 +2137,19 @@ get_last_wal_receive_location(PGconn *conn) return ptr; } + + +bool +is_server_available(const char *conninfo) +{ + PGPing status = PQping(conninfo); + + log_verbose(LOG_DEBUG, "is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status); + + if (status == PQPING_OK) + return true; + + log_warning("is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status); + + return false; +} diff --git a/dbutils.h b/dbutils.h index 9b16877c..8b226e31 100644 --- a/dbutils.h +++ b/dbutils.h @@ -146,5 +146,6 @@ int get_data_checksum_version(const char *data_directory); /* backported from repmgr 4.x */ XLogRecPtr parse_lsn(const char *str); XLogRecPtr get_last_wal_receive_location(PGconn *conn); +bool is_server_available(const char *conninfo); #endif diff --git a/repmgrd.c b/repmgrd.c index 676554c9..f9cdc496 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -879,9 +879,7 @@ standby_monitor(void) * local_options.reconnect_interval seconds */ - check_connection(&upstream_conn, upstream_node_type, upstream_conninfo); - - if (PQstatus(upstream_conn) != CONNECTION_OK) + if (!check_connection(&upstream_conn, upstream_node_type, upstream_conninfo)) { int previous_master_node_id = master_options.node; @@ -2137,8 +2135,11 @@ check_connection(PGconn **conn, const char *type, const char *conninfo) { int connection_retries; + if (conninfo != NULL && is_server_available(conninfo)) + return true; + /* - * Check if the node is still available if after + * Check if the node is still available; if after * local_options.reconnect_attempts * local_options.reconnect_interval * seconds of retries we cannot reconnect return false */