repmgrd: improve logging of sibling node's upstream info

If the sibling node has already been promoted (for whatever
reason, e.g. "repmgr standby promote" was executed manually)
and has exited recovery, the upstream node ID will normally
be reported as "-1", which is correct, but looks confusing in
the logs.

We now only report the upstream node ID if the sibling node
is still in recovery, *or* if it has exited recovery but is
still reporting an extant node ID.
This commit is contained in:
Ian Barwick
2019-04-29 13:48:18 +09:00
parent ec6266e375
commit 87910a5448

View File

@@ -4088,12 +4088,6 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
continue;
}
log_info(_("node \"%s\" (ID: %i) reports its upstream is node %i, last seen %i second(s) ago"),
cell->node_info->node_name,
cell->node_info->node_id,
sibling_replication_info.upstream_node_id,
sibling_replication_info.upstream_last_seen);
/*
* Check if node is not in recovery - it may have been promoted
* outside of the failover mechanism, in which case we may be able
@@ -4108,6 +4102,21 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
cell->node_info->node_name,
cell->node_info->node_id);
/*
* Node is not in recovery, but still reporting an upstream
* node ID; possible it was promoted manually (e.g. with "pg_ctl promote"),
* or (less likely) the node's repmgrd has just switched to primary
* monitoring node but has not yet unset the upstream node ID in
* shared memory. Either way, log this.
*/
if (sibling_replication_info.upstream_node_id != UNKNOWN_NODE_ID)
{
log_warning(_("node \"%s\" (ID: %i) still reports its upstream is node %i, last seen %i second(s) ago"),
cell->node_info->node_name,
cell->node_info->node_id,
sibling_replication_info.upstream_node_id,
sibling_replication_info.upstream_last_seen);
}
can_follow = check_node_can_follow(local_conn,
local_node_info.last_wal_receive_lsn,
cell->node_info->conn,
@@ -4128,6 +4137,14 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
cell->node_info->node_id);
continue;
}
else
{
log_info(_("node \"%s\" (ID: %i) reports its upstream is node %i, last seen %i second(s) ago"),
cell->node_info->node_name,
cell->node_info->node_id,
sibling_replication_info.upstream_node_id,
sibling_replication_info.upstream_last_seen);
}
/* check if WAL replay on node is paused */
if (sibling_replication_info.wal_replay_paused == true)