From 5f1ba6db3d36f3bfaf471af8b2b11ef3d306e8b4 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 28 Jul 2021 11:42:08 +0900 Subject: [PATCH] standby switchover: improve handling of node rejoin failure Explicitly check whether the "repmgr node rejoin" command on the demotion candidate succeeded. Due to the way SSH execution is currently implemented, we can return either the command execution status or the command output; to ensure any errors are available, log them to a temporary file on the demotion candidate and note its location in case of an error. While we're at it, improve error message handling when the demotion candidate fails to rejoin. --- HISTORY | 1 + doc/appendix-release-notes.xml | 15 +++ repmgr-action-standby.c | 201 +++++++++++++++++++++++---------- repmgr-client-global.h | 2 + 4 files changed, 159 insertions(+), 60 deletions(-) diff --git a/HISTORY b/HISTORY index 567e3257..c9b412a5 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,5 @@ 5.3.0 2021-??-?? + standby switchover: improve handling of node rejoin failure (Ian) repmgrd: prefix all shared library functions with "repmgr_" to minimize the risk of clashes with other shared libraries (Ian) repmgrd: at startup, if node record is marked as "inactive", attempt diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index c5adb7fd..2a28b651 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -30,6 +30,21 @@ Improvements + + + repmgr standby switchover: + Improve handling of node rejoin failure on the demotion candidate. + + + Previously &repmgr; did not check whether repmgr node rejoin actually + succeeded on the demotion candidate, and would always wait up to node_rejoin_timeout + seconds for it to attach to the promotion candidate, even if this would never happen. + + + This makes it easier to identify unexpected events during a switchover operation, such as + the demotion candidate being unexpectedly restarted by an external process. + + &repmgrd;: at startup, if node record is marked as "inactive", attempt diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 4b094085..69121541 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3651,8 +3651,9 @@ do_standby_switchover(void) PQExpBufferData remote_command_str; PQExpBufferData command_output; PQExpBufferData node_rejoin_options; - PQExpBufferData errmsg; + PQExpBufferData logmsg; PQExpBufferData detailmsg; + PQExpBufferData event_details; int r, i; @@ -3669,6 +3670,9 @@ do_standby_switchover(void) /* store list of configuration files on the demotion candidate */ KeyValueList remote_config_files = {NULL, NULL}; + /* temporary log file for "repmgr node rejoin" on the demotion candidate */ + char node_rejoin_log[MAXPGPATH] = ""; + NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER; SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER; @@ -3811,24 +3815,24 @@ do_standby_switchover(void) * the demotion candidate as the rejoin will fail if we are unable to to write to that. */ - initPQExpBuffer(&errmsg); + initPQExpBuffer(&logmsg); initPQExpBuffer(&detailmsg); if (check_replication_config_owner(PQserverVersion(local_conn), config_file_options.data_directory, - &errmsg, &detailmsg) == false) + &logmsg, &detailmsg) == false) { - log_error("%s", errmsg.data); + log_error("%s", logmsg.data); log_detail("%s", detailmsg.data); - termPQExpBuffer(&errmsg); + termPQExpBuffer(&logmsg); termPQExpBuffer(&detailmsg); PQfinish(local_conn); exit(ERR_BAD_CONFIG); } - termPQExpBuffer(&errmsg); + termPQExpBuffer(&logmsg); termPQExpBuffer(&detailmsg); /* check remote server connection and retrieve its record */ @@ -5367,6 +5371,21 @@ do_standby_switchover(void) pfree(conninfo_normalized); } + /* */ + snprintf(node_rejoin_log, MAXPGPATH, +#if defined(__i386__) || defined(__i386) + "/tmp/node-rejoin.%u.log", + (unsigned)time(NULL) +#else + "/tmp/node-rejoin.%lu.log", + (unsigned long)time(NULL) +#endif + ); + + appendPQExpBuffer(&remote_command_str, + " > %s 2>&1 && echo \"1\" || echo \"0\"", + node_rejoin_log); + termPQExpBuffer(&node_rejoin_options); log_debug("executing:\n %s", remote_command_str.data); @@ -5380,78 +5399,140 @@ do_standby_switchover(void) termPQExpBuffer(&remote_command_str); - /* TODO: verify this node's record was updated correctly */ + initPQExpBuffer(&logmsg); + initPQExpBuffer(&detailmsg); + /* This is failure to execute the ssh command */ if (command_success == false) { log_error(_("rejoin failed with error code %i"), r); + switchover_success = false; + + appendPQExpBuffer(&logmsg, + _("unable to execute \"repmgr node rejoin\" on demotion candidate \"%s\" (ID: %i)"), + remote_node_record.node_name, + remote_node_record.node_id); + appendPQExpBufferStr(&detailmsg, + command_output.data); - create_event_notification_extended(local_conn, - &config_file_options, - config_file_options.node_id, - "standby_switchover", - false, - command_output.data, - &event_info); } else { - PQExpBufferData event_details; - standy_join_status join_success = check_standby_join(local_conn, - &local_node_record, - &remote_node_record); + standy_join_status join_success = JOIN_UNKNOWN; - initPQExpBuffer(&event_details); - - switch (join_success) { - case JOIN_FAIL_NO_PING: - appendPQExpBuffer(&event_details, - _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not beome available"), - config_file_options.node_name, - config_file_options.node_id, - remote_node_record.node_name, - remote_node_record.node_id); - switchover_success = false; - - break; - case JOIN_FAIL_NO_REPLICATION: - appendPQExpBuffer(&event_details, - _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not connect to the new primary"), - config_file_options.node_name, - config_file_options.node_id, - remote_node_record.node_name, - remote_node_record.node_id); - switchover_success = false; - break; - case JOIN_SUCCESS: - appendPQExpBuffer(&event_details, - _("node \"%s\" (ID: %i) promoted to primary, node \"%s\" (ID: %i) demoted to standby"), - config_file_options.node_name, - config_file_options.node_id, - remote_node_record.node_name, - remote_node_record.node_id); - } - - create_event_notification_extended(local_conn, - &config_file_options, - config_file_options.node_id, - "standby_switchover", - switchover_success, - event_details.data, - &event_info); - if (switchover_success == true) + /* "rempgr node rejoin" failed on the demotion candidate */ + if (command_output.data[0] == '0') { - log_notice("%s", event_details.data); + appendPQExpBuffer(&logmsg, + _("execution of \"repmgr node rejoin\" on demotion candidate \"%s\" (ID: %i) failed"), + remote_node_record.node_name, + remote_node_record.node_id); + + appendPQExpBuffer(&detailmsg, + "check log file \"%s\" on \"%s\" for details", + node_rejoin_log, + remote_node_record.node_name); + + switchover_success = false; + join_success = JOIN_COMMAND_FAIL; } else { - log_error("%s", event_details.data); + join_success = check_standby_join(local_conn, + &local_node_record, + &remote_node_record); + + + + switch (join_success) { + case JOIN_FAIL_NO_PING: + appendPQExpBuffer(&logmsg, + _("node \"%s\" (ID: %i) promoted to primary, but demotion candidate \"%s\" (ID: %i) did not become available"), + config_file_options.node_name, + config_file_options.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + + switchover_success = false; + + break; + case JOIN_FAIL_NO_REPLICATION: + appendPQExpBuffer(&logmsg, + _("node \"%s\" (ID: %i) promoted to primary, but demotion candidate \"%s\" (ID: %i) did not connect to the new primary"), + config_file_options.node_name, + config_file_options.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + switchover_success = false; + break; + case JOIN_SUCCESS: + appendPQExpBuffer(&logmsg, + _("node \"%s\" (ID: %i) promoted to primary, node \"%s\" (ID: %i) demoted to standby"), + config_file_options.node_name, + config_file_options.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + break; + /* check_standby_join() does not return this */ + case JOIN_COMMAND_FAIL: + break; + /* should never happen*/ + case JOIN_UNKNOWN: + appendPQExpBuffer(&logmsg, + "unable to determine success of node rejoin action for demotion candidate \"%s\" (ID: %i)", + remote_node_record.node_name, + remote_node_record.node_id); + switchover_success = false; + break; + } + + if (switchover_success == false) + { + appendPQExpBuffer(&detailmsg, + "check the PostgreSQL log file on demotion candidate \"%s\" (ID: %i)", + remote_node_record.node_name, + remote_node_record.node_id); + } } - termPQExpBuffer(&event_details); } + if (switchover_success == true) + { + /* TODO: verify demotion candidates's node record was updated correctly */ + + log_notice("%s", logmsg.data); + } + else + { + log_error("%s", logmsg.data); + } + + initPQExpBuffer(&event_details); + + appendPQExpBufferStr(&event_details, logmsg.data); + + if (detailmsg.data[0] != '\0') + { + log_detail("%s", detailmsg.data); + appendPQExpBuffer(&event_details, "\n%s", + detailmsg.data); + } + + + create_event_notification_extended(local_conn, + &config_file_options, + config_file_options.node_id, + "standby_switchover", + switchover_success, + event_details.data, + &event_info); + + termPQExpBuffer(&event_details); + termPQExpBuffer(&logmsg); + termPQExpBuffer(&detailmsg); termPQExpBuffer(&command_output); + /* * If --siblings-follow specified, attempt to make them follow the new * primary diff --git a/repmgr-client-global.h b/repmgr-client-global.h index eeb868b9..fe9a3968 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -219,7 +219,9 @@ typedef enum typedef enum { + JOIN_UNKNOWN = -1, JOIN_SUCCESS, + JOIN_COMMAND_FAIL, JOIN_FAIL_NO_PING, JOIN_FAIL_NO_REPLICATION } standy_join_status;