diff --git a/dbutils.c b/dbutils.c index d996e46c..7764d99f 100644 --- a/dbutils.c +++ b/dbutils.c @@ -604,7 +604,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("unable to get conninfo for upstream server\n%s\n"), + log_err(_("error when attempting to find upstream server\n%s\n"), PQerrorMessage(standby_conn)); PQclear(res); return NULL; @@ -612,15 +612,42 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, if (!PQntuples(res)) { - log_notice(_("no record found for upstream server\n")); PQclear(res); - return NULL; + log_debug("no record found for upstream server\n"); + + sqlquery_snprintf(sqlquery, + " SELECT un.conninfo, un.name, un.id " + " FROM %s.repl_nodes un " + " WHERE un.cluster = '%s' " + " AND un.type='master' " + " AND un.active IS TRUE", + get_repmgr_schema_quoted(standby_conn), + cluster); + res = PQexec(standby_conn, sqlquery); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("error when attempting to find active master server\n%s\n"), + PQerrorMessage(standby_conn)); + PQclear(res); + return NULL; + } + + if (!PQntuples(res)) + { + PQclear(res); + log_notice(_("no record found for active master server\n")); + + return NULL; + } + + log_debug("record found for active master server\n"); } strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO); if (upstream_node_id_ptr != NULL) - *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1)); + *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 2)); PQclear(res); diff --git a/repmgrd.c b/repmgrd.c index 1c3c182d..ae10aa08 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -742,7 +742,8 @@ standby_monitor(void) upstream_conn = get_upstream_connection(my_local_conn, local_options.cluster_name, local_options.node, - &upstream_node_id, upstream_conninfo); + &upstream_node_id, + upstream_conninfo); upstream_node_type = (upstream_node_id == master_options.node) ? "master" @@ -826,7 +827,7 @@ standby_monitor(void) * Failover handling is handled differently depending on whether * the failed node is the master or a cascading standby */ - upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id); + upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); if (upstream_node.type == MASTER) { @@ -929,7 +930,7 @@ standby_monitor(void) * from the upstream node to write monitoring information */ - upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id); + upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); sprintf(sqlquery, "SELECT id " @@ -2397,7 +2398,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id) if (res == 0) { - log_warning(_("No record found record for node %i\n"), node_id); + log_warning(_("No record found for node %i\n"), node_id); } return node_info;