repmgrd: better handling of missing upstream_node_id

Ensure we default to master node.
2026-07-16 14:29:05 +00:00 · 2016-05-11 21:47:40 +09:00
parent 57f9432692
commit 21b2ff1a1f
2 changed files with 36 additions and 8 deletions
@@ -604,7 +604,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,

 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		log_err(_("unable to get conninfo for upstream server\n%s\n"),
+		log_err(_("error when attempting to find upstream server\n%s\n"),
 				PQerrorMessage(standby_conn));
 		PQclear(res);
 		return NULL;
@@ -612,15 +612,42 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,

 	if (!PQntuples(res))
 	{
-		log_notice(_("no record found for upstream server\n"));
 		PQclear(res);
-		return NULL;
+		log_debug("no record found for upstream server\n");
+
+		sqlquery_snprintf(sqlquery,
+						  "    SELECT un.conninfo, un.name, un.id "
+						  "      FROM %s.repl_nodes un "
+						  "     WHERE un.cluster = '%s' "
+						  "       AND un.type='master' "
+						  "       AND un.active IS TRUE",
+						  get_repmgr_schema_quoted(standby_conn),
+						  cluster);
+		res = PQexec(standby_conn, sqlquery);
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			log_err(_("error when attempting to find active master server\n%s\n"),
+					PQerrorMessage(standby_conn));
+			PQclear(res);
+			return NULL;
+		}
+
+		if (!PQntuples(res))
+		{
+			PQclear(res);
+			log_notice(_("no record found for active master server\n"));
+
+			return NULL;
+		}
+
+		log_debug("record found for active master server\n");
 	}

 	strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);

 	if (upstream_node_id_ptr != NULL)
-		*upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1));
+		*upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 2));

 	PQclear(res);

@@ -742,7 +742,8 @@ standby_monitor(void)
 	upstream_conn = get_upstream_connection(my_local_conn,
 											local_options.cluster_name,
 											local_options.node,
-											&upstream_node_id, upstream_conninfo);
+											&upstream_node_id,
+											upstream_conninfo);

 	upstream_node_type = (upstream_node_id == master_options.node)
 		? "master"
@@ -826,7 +827,7 @@ standby_monitor(void)
 			 * Failover handling is handled differently depending on whether
 			 * the failed node is the master or a cascading standby
 			 */
-			upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
+			upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);

            if (upstream_node.type == MASTER)
            {
@@ -929,7 +930,7 @@ standby_monitor(void)
 	 * from the upstream node to write monitoring information
 	 */

-	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
+	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);

 	sprintf(sqlquery,
 			"SELECT id "
@@ -2397,7 +2398,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)

 	if (res == 0)
 	{
-		log_warning(_("No record found record for node %i\n"), node_id);
+		log_warning(_("No record found for node %i\n"), node_id);
 	}

 	return node_info;