diff --git a/HISTORY b/HISTORY index dcf23e83..374c464c 100644 --- a/HISTORY +++ b/HISTORY @@ -10,8 +10,11 @@ 3.1.5 2016-08- repmgrd: in a failover situation, prevent endless looping when - attempting to establish the status of a node with - `failover=manual` (Ian) + attempting to establish the status of a node with + `failover=manual` (Ian) + repmgrd: improve handling of failover events on standbys with + `failover=manual`, and create a new event notification + for this, `standby_disconnect_manual` (Ian) 3.1.4 2016-07-12 repmgr: new configuration option for setting "restore_command" diff --git a/README.md b/README.md index 11a8e0bc..2f265701 100644 --- a/README.md +++ b/README.md @@ -1141,9 +1141,16 @@ table , it's advisable to regularly purge historical data with `repmgr cluster cleanup`; use the `-k/--keep-history` to specify how many day's worth of data should be retained. +It's possible to use `repmgrd` to provide monitoring only for some or all +nodes by setting `failover = manual` in the node's `repmgr.conf`. In the +event of the node's upstream failing, no failover action will be taken +and the node will require manual intervention to be reattached to replication. +If this occurs, event notification `standby_disconnect_manual` will be +created. + Note that when a standby node is not streaming directly from its upstream -node, i.e. recovering WAL from an archive, `apply_lag` will always -appear as `0 bytes`. +node, e.g. recovering WAL from an archive, `apply_lag` will always appear as +`0 bytes`. Using a witness server with repmgrd @@ -1242,6 +1249,7 @@ The following event types are available: * `standby_promote` * `standby_follow` * `standby_switchover` + * `standby_disconnect_manual` * `witness_create` * `witness_register` * `witness_unregister` diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 4233d061..d76be91e 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -170,10 +170,18 @@ #reconnect_interval=10 # Autofailover options -#failover=manual # one of 'automatic', 'manual' - # (default: manual) -#priority=100 # a value of zero or less prevents the node being promoted to primary +#failover=manual # one of 'automatic', 'manual' (default: manual) + # defines the action to take in the event of upstream failure + # + # 'automatic': repmgrd will automatically attempt to promote the + # node or follow the new upstream node + # 'manual': repmgrd will take no action and the mode will require + # manual attention to reattach it to replication + +#priority=100 # indicate a preferred priorty for promoting nodes + # a value of zero or less prevents the node being promoted to primary # (default: 100) + #promote_command='repmgr standby promote -f /path/to/repmgr.conf' #follow_command='repmgr standby follow -f /path/to/repmgr.conf -W' diff --git a/repmgrd.c b/repmgrd.c index c789e096..8ba57f70 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -63,6 +63,13 @@ t_node_info node_info; bool failover_done = false; +/* + * when `failover=manual`, and the upstream server has gone away, + * this flag is set to indicate we should connect to whatever the + * current master is to update monitoring information + */ +bool manual_mode_upstream_disconnected = false; + char *pid_file = NULL; static void help(void); @@ -450,6 +457,7 @@ main(int argc, char **argv) my_local_conn = establish_db_connection(local_options.conninfo, true); update_registration(); } + /* Log startup event */ if (startup_event_logged == false) { @@ -737,6 +745,8 @@ standby_monitor(void) const char *upstream_node_type = NULL; bool receiving_streamed_wal = true; + + /* * Verify that the local node is still available - if not there's * no point in doing much else anyway @@ -758,15 +768,32 @@ standby_monitor(void) goto continue_monitoring_standby; } - upstream_conn = get_upstream_connection(my_local_conn, - local_options.cluster_name, - local_options.node, - &upstream_node_id, - upstream_conninfo); + /* + * Standby has `failover` set to manual and is disconnected from + * replication following a prior upstream node failure - we'll + * find the master to be able to write monitoring information, if + * required + */ + if (manual_mode_upstream_disconnected == true) + { + upstream_conn = get_master_connection(my_local_conn, + local_options.cluster_name, + &upstream_node_id, + upstream_conninfo); + upstream_node_type = "master"; + } + else + { + upstream_conn = get_upstream_connection(my_local_conn, + local_options.cluster_name, + local_options.node, + &upstream_node_id, + upstream_conninfo); - upstream_node_type = (upstream_node_id == master_options.node) - ? "master" - : "upstream"; + upstream_node_type = (upstream_node_id == master_options.node) + ? "master" + : "upstream"; + } /* * Check that the upstream node is still available @@ -781,16 +808,27 @@ standby_monitor(void) if (PQstatus(upstream_conn) != CONNECTION_OK) { + int previous_master_node_id = master_options.node; + PQfinish(upstream_conn); upstream_conn = NULL; + /* + * When `failover=manual`, no actual failover will be performed, instead + * the following happens: + * - find the new master + * - create an event notification `standby_disconnect_manual` + * - set a flag to indicate we're disconnected from replication, + */ if (local_options.failover == MANUAL_FAILOVER) { log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type); /* * Set the location string in shared memory to indicate to other - * repmgrd instances that we're *not* a promotion candidate + * repmgrd instances that we're *not* a promotion candidate and + * that other repmgrd instance should not expect location updates + * from us */ update_shared_memory(PASSIVE_NODE); @@ -799,13 +837,14 @@ standby_monitor(void) { master_conn = get_master_connection(my_local_conn, local_options.cluster_name, &master_options.node, NULL); + if (PQstatus(master_conn) == CONNECTION_OK) { /* * Connected, we can continue the process so break the * loop */ - log_err(_("connected to node %d, continuing monitoring.\n"), + log_notice(_("connected to node %d, continuing monitoring.\n"), master_options.node); break; } @@ -846,7 +885,34 @@ standby_monitor(void) } /* + * connected to a master - is it the same as the former upstream? + * if not: + * - create event standby_disconnect + * - set global "disconnected_manual_standby" */ + + if (previous_master_node_id != master_options.node) + { + PQExpBufferData errmsg; + initPQExpBuffer(&errmsg); + + appendPQExpBuffer(&errmsg, + _("node %i is in manual failover mode and is now disconnected from replication"), + local_options.node); + + log_verbose(LOG_DEBUG, "old master: %i; current: %i\n", previous_master_node_id, master_options.node); + + manual_mode_upstream_disconnected = true; + + create_event_record(master_conn, + &local_options, + local_options.node, + "standby_disconnect_manual", + /* here "true" indicates the action has occurred as expected */ + true, + errmsg.data); + + } } else if (local_options.failover == AUTOMATIC_FAILOVER) { @@ -947,8 +1013,8 @@ standby_monitor(void) * the stream. If we set the local standby node as failed and it's now running * and receiving replication data, we should activate it again. */ - set_local_node_status(); - log_info(_("standby connection recovered!\n")); + set_local_node_status(); + log_info(_("standby connection recovered!\n")); } /* Fast path for the case where no history is requested */ @@ -960,6 +1026,7 @@ standby_monitor(void) * from the upstream node to write monitoring information */ + /* XXX not used? */ upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); sprintf(sqlquery,