From 8881b69c067895bdbbd80c0ff74f3430758398d5 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 16 Jan 2019 15:56:45 +0900 Subject: [PATCH] "standby switchover": check remote data directory configuration The switchover will fail if the data_directory parameter in repmgr.conf on the remote node (demotion candidate) is incorrectly configured. We use the previously added "repmgr node check --data-directory-config to verify this, and abort early if an issue is discovered. Implements GitHub #523. --- HISTORY | 2 + doc/appendix-release-notes.sgml | 11 +++- repmgr-action-standby.c | 112 +++++++++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 3 deletions(-) diff --git a/HISTORY b/HISTORY index 90c868eb..4a7acce9 100644 --- a/HISTORY +++ b/HISTORY @@ -3,6 +3,8 @@ repmgr: add --terse option to "cluster show"; GitHub #521 (Ian) repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian) repmgr: add "node check --data-directory-config"; GitHub #523 (Ian) + repmgr: ensure "standby switchover" verifies repmgr can read the + data directory on the demotion candidate; GitHub #523 (Ian) repmgr: "standby switchover": improve handling of connection URIs when executing "node rejoin" on the demotion candidate; GitHub #525 (Ian) repmgrd: check binary and extension major versions match; GitHub #515 (Ian) diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index b3a8322b..aec8b864 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -53,7 +53,16 @@ repmgr node check --data-directory-config - option added; this is to confirm &repmgr; is correctly configured. + option added; this is to confirm &repmgr; is correctly configured. GitHub #523. + + + + + + Add check repmgr standby switchover + to ensure the data directory on the demotion candidate is configured correctly in repmgr.conf. + This is to ensure that &repmgr;, when remotely executed on the demotion candidate, can correctly verify + that PostgreSQL on the demotion candidate was shut down cleanly. GitHub #523. diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 1d639af5..a098a899 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -101,6 +101,7 @@ static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint); static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold); static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output); +static bool parse_data_directory_config(const char *node_check_output); /* * STANDBY CLONE @@ -3455,7 +3456,6 @@ do_standby_switchover(void) "(not set)"); } - log_hint("%s", hint.data); termPQExpBuffer(&hint); @@ -3465,12 +3465,70 @@ do_standby_switchover(void) exit(ERR_BAD_CONFIG); } + termPQExpBuffer(&command_output); + + /* + * Sanity-check remote "data_directory" is correctly configured in repmgr.conf. + * + * This is important as we'll need to be able to run "repmgr node status" on the data + * directory after the remote (demotion candidate) has shut down. + */ + + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str, &remote_node_record); + + /* + * --data-directory-config is available from repmgr 4.3; it will fail + * if the remote repmgr is an earlier version, but the version should match + * anyway. + */ + appendPQExpBufferStr(&remote_command_str, "node check --data-directory-config --optformat -LINFO 2>/dev/null"); + + initPQExpBuffer(&command_output); + command_success = remote_command(remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (command_success == false) + { + log_error(_("unable to execute \"%s node check --data-directory-config\" on \"%s\":"), + progname(), remote_host); + log_detail("%s", command_output.data); + + PQfinish(remote_conn); + PQfinish(local_conn); + + termPQExpBuffer(&command_output); + + exit(ERR_BAD_CONFIG); + } + + if (parse_data_directory_config(command_output.data) == false) + { + log_error(_("\"data_directory\" parameter in repmgr.conf on \"%s\" is incorrectly configured"), + remote_node_record.node_name); + + log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" to diagnose the issue"), + remote_node_record.node_name); + + PQfinish(remote_conn); + PQfinish(local_conn); + + termPQExpBuffer(&command_output); + + exit(ERR_BAD_CONFIG); + } + + termPQExpBuffer(&command_output); if (runtime_options.dry_run == true) { log_info(_("able to execute \"%s\" on remote host \"localhost\""), progname()); } - termPQExpBuffer(&command_output); + /* check remote repmgr has the data directory correctly configured */ @@ -7137,7 +7195,57 @@ parse_node_check_archiver(const char *node_check_output, int *files, int *thresh return status; } +static bool +parse_data_directory_config(const char *node_check_output) +{ + bool config_ok = true; + int c = 0, + argc_item = 0; + char **argv_array = NULL; + int optindex = 0; + + /* We're only interested in this option */ + struct option node_check_options[] = + { + {"configured-data-directory", required_argument, NULL, 'C'}, + {NULL, 0, NULL, 0} + }; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_check_output)) + { + return false; + } + + argc_item = parse_output_to_argv(node_check_output, &argv_array); + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options, + &optindex)) != -1) + { + switch (c) + { + /* --configured-data-directory */ + case 'C': + { + /* we only care whether it's "OK" or not */ + if (strncmp(optarg, "OK", 2) != 0) + config_ok = false; + } + break; + } + } + + free_parsed_argv(&argv_array); + + return config_ok; +} void