mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 00:46:29 +00:00
repmgrd: don't start if node is inactive and failover=automatic
If failover=automatic, it would be reasonable to expect repmgrd to consider this node as a promotion candidate, however this will not happen if it is marked inactive. This often happens when a failed primary is recloned as a standby but not re-registered, and if repmgrd would run it would give the incorrect impression that failover capability is available. Addresses GitHub #153.
This commit is contained in:
11
FAQ.md
11
FAQ.md
@@ -151,6 +151,9 @@ General
|
|||||||
|
|
||||||
In `repmgr.conf`, set its priority to a value of 0 or less.
|
In `repmgr.conf`, set its priority to a value of 0 or less.
|
||||||
|
|
||||||
|
Additionally, if `failover` is set to `manual`, the node will never
|
||||||
|
be considered as a promotion candidate.
|
||||||
|
|
||||||
- Does `repmgrd` support delayed standbys?
|
- Does `repmgrd` support delayed standbys?
|
||||||
|
|
||||||
`repmgrd` can monitor delayed standbys - those set up with
|
`repmgrd` can monitor delayed standbys - those set up with
|
||||||
@@ -169,3 +172,11 @@ General
|
|||||||
|
|
||||||
Configure your system's `logrotate` service to do this; see example
|
Configure your system's `logrotate` service to do this; see example
|
||||||
in README.md
|
in README.md
|
||||||
|
|
||||||
|
- I've recloned a failed master as a standby, but `repmgrd` refuses to start?
|
||||||
|
|
||||||
|
Check you registered the standby after recloning. If unregistered the standby
|
||||||
|
cannot be considered as a promotion candidate even if `failover` is set to
|
||||||
|
`automatic`, which is probably not what you want. `repmgrd` will start if
|
||||||
|
`failover` is set to `manual` so the node's replication status can still
|
||||||
|
be monitored, if desired.
|
||||||
|
|||||||
35
repmgrd.c
35
repmgrd.c
@@ -311,6 +311,41 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
log_debug("node id is %i, upstream is %i\n", node_info.node_id, node_info.upstream_node_id);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if node record is active - if not, and `failover=automatic`, the node
|
||||||
|
* won't be considered as a promotion candidate; this often happens when
|
||||||
|
* a failed primary is recloned and the node was not re-registered, giving
|
||||||
|
* the impression failover capability is there when it's not. In this case
|
||||||
|
* abort with an error and a hint about registering.
|
||||||
|
*
|
||||||
|
* If `failover=manual`, repmgrd can continue to passively monitor the node, but
|
||||||
|
* we should nevertheless issue a warning and the same hint.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (node_info.active == false)
|
||||||
|
{
|
||||||
|
char *hint = "Check that 'repmgr (master|standby) register' was executed for this node";
|
||||||
|
|
||||||
|
switch (local_options.failover)
|
||||||
|
{
|
||||||
|
case AUTOMATIC_FAILOVER:
|
||||||
|
log_err(_("This node is marked as inactive and cannot be used for failover\n"));
|
||||||
|
log_hint(_("%s\n"), hint);
|
||||||
|
terminate(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
|
case MANUAL_FAILOVER:
|
||||||
|
log_warning(_("This node is marked as inactive and will be passively monitored only\n"));
|
||||||
|
log_hint(_("%s\n"), hint);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* This should never happen */
|
||||||
|
log_err(_("Unknown failover mode %i\n"), local_options.failover);
|
||||||
|
terminate(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MAIN LOOP This loops cycles at startup and once per failover and
|
* MAIN LOOP This loops cycles at startup and once per failover and
|
||||||
* Requisites:
|
* Requisites:
|
||||||
|
|||||||
Reference in New Issue
Block a user