When writing monitoring info, ensure standby connects to current primary

If the node is a cascaded standby and the primary fails, `primary_conn` will not be updated automatically; when writing monitoring info, ensure we connect to the current primary.
2026-07-16 14:29:05 +00:00 · 2015-01-27 21:19:25 +09:00
parent 36d94c88ac
commit 109269f7fb
1 changed files with 51 additions and 4 deletions
@@ -625,7 +625,9 @@ standby_monitor(void)
 	int			upstream_node_id;
 	t_node_info upstream_node;
 	int			active_primary_id;
 	const char *type = NULL;
 	/*
 	 * Verify that the local node is still available - if not there's
 	 * no point in doing much else anyway
@@ -639,10 +641,6 @@ standby_monitor(void)
 		terminate(1);
 	}
 	// ZZZ if connected to cascading standby, check primary conn too;
 	// if original primary has gone away we'll need to get the new one
 	// from the upstream node to write monitoring information
 	upstream_conn = get_upstream_connection(my_local_conn,
 											local_options.cluster_name,
 											local_options.node,
@@ -786,6 +784,55 @@ standby_monitor(void)
 	if (!monitoring_history)
 		return;
 	/*
 	 * If original primary has gone away we'll need to get the new one
 	 * from the upstream node to write monitoring information
 	 */
 	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
 	sprintf(sqlquery,
 			"SELECT id "
 			"  FROM %s.repl_nodes "
 			" WHERE type = 'primary' "
 			"   AND active IS TRUE ",
 			get_repmgr_schema_quoted(my_local_conn));
 	res = PQexec(my_local_conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
 		log_err(_("standby_monitor() - query error:%s\n"), PQerrorMessage(my_local_conn));
 		PQclear(res);
 		/* Not a fatal error, just means no monitoring records will be written */
 		return;
 	}
 	if(PQntuples(res) == 0)
 	{
 		log_err(_("standby_monitor(): no active primary found\n"));
 		PQclear(res);
 		return;
 	}
 	active_primary_id = atoi(PQgetvalue(res, 0, 0));
 	PQclear(res);
 	if(active_primary_id != primary_options.node)
 	{
 		log_notice(_("Connecting to active cluster primary (node %i)...\n"), active_primary_id); \
 		if(primary_conn != NULL)
 		{
 			PQfinish(primary_conn);
 		}
 		primary_conn = get_master_connection(my_local_conn,
 											 local_options.cluster_name,
 											 &primary_options.node, NULL);
 	}
 	/*
 	 * Cancel any query that is still being executed, so i can insert the
 	 * current record