From 0c68018631cf8e17a92bd8132fb278459c6c6a46 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Sat, 23 Feb 2019 15:55:06 +0900 Subject: [PATCH] repmgrd: log details of nodes which can see primary If a failover is cancelled because other nodes can still see the primary, log the identies of those nodes. --- repmgrd-physical.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 1fdb8b79..d88af6f7 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -3092,6 +3092,9 @@ do_election(void) ReplInfo local_replication_info; + /* To collate details of nodes with primary visible for logging purposes */ + PQExpBufferData nodes_with_primary_visible; + /* * Check if at least one server in the primary's location is visible; if * not we'll assume a network split between this node and the primary @@ -3224,6 +3227,8 @@ do_election(void) /* pointer to "winning" node, initially self */ candidate_node = &local_node_info; + initPQExpBuffer(&nodes_with_primary_visible); + for (cell = sibling_nodes.head; cell; cell = cell->next) { ReplInfo sibling_replication_info; @@ -3319,6 +3324,11 @@ do_election(void) nodes_with_primary_still_visible++; log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"), cell->node_info->node_id, sibling_replication_info.primary_last_seen); + appendPQExpBuffer(&nodes_with_primary_visible, + " - node \"%s\" (ID: %i): %i second(s) ago\n", + cell->node_info->node_name, + cell->node_info->node_id, + sibling_replication_info.primary_last_seen); } else { @@ -3394,17 +3404,24 @@ do_election(void) if (nodes_with_primary_still_visible > 0) { - log_notice(_("%i nodes can seen the primary"), nodes_with_primary_still_visible); - // XXX list nodes as detail + log_notice(_("%i nodes can see the primary"), + nodes_with_primary_still_visible); + + log_detail(_("following nodes can see the primary:\n%s"), + nodes_with_primary_visible.data); monitoring_state = MS_DEGRADED; INSTR_TIME_SET_CURRENT(degraded_monitoring_start); reset_node_voting_status(); + termPQExpBuffer(&nodes_with_primary_visible); + return ELECTION_CANCELLED; } + termPQExpBuffer(&nodes_with_primary_visible); + log_info(_("visible nodes: %i; total nodes: %i; no nodes have seen the primary within the last %i seconds"), visible_nodes, total_nodes,