mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgrd: don't consider nodes where repmgrd is not running
If, for whatever reason, repmgrd is not running on a node, but that node qualifies as promotion candidate, failover will not take place as that node will never promote itself. We therefore discount nodes where repmgrd is running as promotion candidates, which will ensure one node is always promoted. There is a slight risk here that the node(s) where repmgrd is not running are further ahead, leading to a timeline fork. It might be possible to mitigate that by having the "election" leader perform the promote (or follow) operation.
This commit is contained in:
@@ -3257,6 +3257,7 @@ do_election(void)
|
||||
log_debug("node %i is witness, not querying state", cell->node_info->node_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* don't check 0-priority nodes */
|
||||
if (cell->node_info->priority == 0)
|
||||
{
|
||||
@@ -3265,6 +3266,22 @@ do_election(void)
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* check if repmgrd running - skip if not
|
||||
*
|
||||
* TODO: include pid query in replication info query?
|
||||
*
|
||||
* NOTE: from Pg12 we could execute "pg_promote()" from a running repmgrd;
|
||||
* here we'll need to find a way of ensuring only one repmgrd does this
|
||||
*/
|
||||
if (repmgrd_get_pid(cell->node_info->conn) == UNKNOWN_PID)
|
||||
{
|
||||
log_warning(_("repmgrd not running on node %i, skipping"),
|
||||
cell->node_info->node_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (get_replication_info(cell->node_info->conn, &sibling_replication_info) == false)
|
||||
{
|
||||
log_warning(_("unable to retrieve replication information for node %i, skipping"),
|
||||
|
||||
Reference in New Issue
Block a user