From 91234994e2bcdaa95c27a30c93d907721a6987da Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 13 Mar 2019 15:34:07 +0900 Subject: [PATCH] doc: merge repmgrd degraded monitoring description into operation section --- doc/filelist.sgml | 1 - doc/repmgr.sgml | 1 - doc/repmgrd-degraded-monitoring.sgml | 87 ---------------------------- doc/repmgrd-operation.sgml | 87 ++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 89 deletions(-) delete mode 100644 doc/repmgrd-degraded-monitoring.sgml diff --git a/doc/filelist.sgml b/doc/filelist.sgml index d3f8b5a0..06609fff 100644 --- a/doc/filelist.sgml +++ b/doc/filelist.sgml @@ -55,7 +55,6 @@ - diff --git a/doc/repmgr.sgml b/doc/repmgr.sgml index 39292d98..3cbcc321 100644 --- a/doc/repmgr.sgml +++ b/doc/repmgr.sgml @@ -86,7 +86,6 @@ &repmgrd-operation; &repmgrd-network-split; &repmgrd-witness-server; - &repmgrd-degraded-monitoring; &repmgrd-monitoring; &repmgrd-bdr; diff --git a/doc/repmgrd-degraded-monitoring.sgml b/doc/repmgrd-degraded-monitoring.sgml deleted file mode 100644 index 9773f813..00000000 --- a/doc/repmgrd-degraded-monitoring.sgml +++ /dev/null @@ -1,87 +0,0 @@ - - - repmgrd - degraded monitoring - - - - degraded monitoring - - - "degraded monitoring" mode - - In certain circumstances, repmgrd is not able to fulfill its primary mission - of monitoring the node's upstream server. In these cases it enters "degraded monitoring" - mode, where repmgrd remains active but is waiting for the situation - to be resolved. - - - Situations where this happens are: - - - - a failover situation has occurred, no nodes in the primary node's location are visible - - - - a failover situation has occurred, but no promotion candidate is available - - - - a failover situation has occurred, but the promotion candidate could not be promoted - - - - a failover situation has occurred, but the node was unable to follow the new primary - - - - a failover situation has occurred, but no primary has become available - - - - a failover situation has occurred, but automatic failover is not enabled for the node - - - - repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary) - - - - - - Example output in a situation where there is only one standby with failover=manual, - and the primary node is unavailable (but is later restarted): - - [2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled) - [2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1) - [2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts - [2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt - (...) - [2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts - [2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts - [2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate - [2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate - [2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node - [2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled) - [2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled) - [2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring - [2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled) - - - - By default, repmgrd will continue in degraded monitoring mode indefinitely. - However a timeout (in seconds) can be set with degraded_monitoring_timeout, - after which repmgrd will terminate. - - - - - If repmgrd is monitoring a primary mode which has been stopped - and manually restarted as a standby attached to a new primary, it will automatically detect - the status change and update the node record to reflect the node's new status - as an active standby. It will then resume monitoring the node as a standby. - - - - diff --git a/doc/repmgrd-operation.sgml b/doc/repmgrd-operation.sgml index 29a029b6..97786542 100644 --- a/doc/repmgrd-operation.sgml +++ b/doc/repmgrd-operation.sgml @@ -213,4 +213,91 @@ NOTICE: node 3 (node3) unpaused + + + repmgrd + degraded monitoring + + + + degraded monitoring + + + "degraded monitoring" mode + + In certain circumstances, repmgrd is not able to fulfill its primary mission + of monitoring the node's upstream server. In these cases it enters "degraded monitoring" + mode, where repmgrd remains active but is waiting for the situation + to be resolved. + + + Situations where this happens are: + + + + a failover situation has occurred, no nodes in the primary node's location are visible + + + + a failover situation has occurred, but no promotion candidate is available + + + + a failover situation has occurred, but the promotion candidate could not be promoted + + + + a failover situation has occurred, but the node was unable to follow the new primary + + + + a failover situation has occurred, but no primary has become available + + + + a failover situation has occurred, but automatic failover is not enabled for the node + + + + repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary) + + + + + + Example output in a situation where there is only one standby with failover=manual, + and the primary node is unavailable (but is later restarted): + + [2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled) + [2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1) + [2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts + [2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt + (...) + [2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts + [2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts + [2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate + [2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate + [2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node + [2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled) + [2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled) + [2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring + [2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled) + + + + By default, repmgrd will continue in degraded monitoring mode indefinitely. + However a timeout (in seconds) can be set with degraded_monitoring_timeout, + after which repmgrd will terminate. + + + + + If repmgrd is monitoring a primary mode which has been stopped + and manually restarted as a standby attached to a new primary, it will automatically detect + the status change and update the node record to reflect the node's new status + as an active standby. It will then resume monitoring the node as a standby. + + + +