From 109269f7fbbf72d08143cdffc2d682a99f77d54e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 27 Jan 2015 21:19:25 +0900 Subject: [PATCH] When writing monitoring info, ensure standby connects to current primary If the node is a cascaded standby and the primary fails, `primary_conn` will not be updated automatically; when writing monitoring info, ensure we connect to the current primary. --- repmgrd.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index b1af31dd..a1a38c38 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -625,7 +625,9 @@ standby_monitor(void) int upstream_node_id; t_node_info upstream_node; + int active_primary_id; const char *type = NULL; + /* * Verify that the local node is still available - if not there's * no point in doing much else anyway @@ -639,10 +641,6 @@ standby_monitor(void) terminate(1); } - // ZZZ if connected to cascading standby, check primary conn too; - // if original primary has gone away we'll need to get the new one - // from the upstream node to write monitoring information - upstream_conn = get_upstream_connection(my_local_conn, local_options.cluster_name, local_options.node, @@ -786,6 +784,55 @@ standby_monitor(void) if (!monitoring_history) return; + + /* + * If original primary has gone away we'll need to get the new one + * from the upstream node to write monitoring information + */ + + upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id); + + sprintf(sqlquery, + "SELECT id " + " FROM %s.repl_nodes " + " WHERE type = 'primary' " + " AND active IS TRUE ", + get_repmgr_schema_quoted(my_local_conn)); + + res = PQexec(my_local_conn, sqlquery); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("standby_monitor() - query error:%s\n"), PQerrorMessage(my_local_conn)); + PQclear(res); + + /* Not a fatal error, just means no monitoring records will be written */ + return; + } + + if(PQntuples(res) == 0) + { + log_err(_("standby_monitor(): no active primary found\n")); + PQclear(res); + return; + } + + active_primary_id = atoi(PQgetvalue(res, 0, 0)); + PQclear(res); + + if(active_primary_id != primary_options.node) + { + log_notice(_("Connecting to active cluster primary (node %i)...\n"), active_primary_id); \ + if(primary_conn != NULL) + { + PQfinish(primary_conn); + } + primary_conn = get_master_connection(my_local_conn, + local_options.cluster_name, + &primary_options.node, NULL); + + } + /* * Cancel any query that is still being executed, so i can insert the * current record