diff --git a/HISTORY b/HISTORY index 7b905112..90c868eb 100644 --- a/HISTORY +++ b/HISTORY @@ -2,6 +2,7 @@ repmgr: add --version-number command line option (Ian) repmgr: add --terse option to "cluster show"; GitHub #521 (Ian) repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian) + repmgr: add "node check --data-directory-config"; GitHub #523 (Ian) repmgr: "standby switchover": improve handling of connection URIs when executing "node rejoin" on the demotion candidate; GitHub #525 (Ian) repmgrd: check binary and extension major versions match; GitHub #515 (Ian) diff --git a/dirutil.c b/dirutil.c index c2acdd3c..bdcccf54 100644 --- a/dirutil.c +++ b/dirutil.c @@ -198,9 +198,9 @@ mkdir_p(char *path, mode_t omode) bool -is_pg_dir(char *path) +is_pg_dir(const char *path) { - char dirpath[MAXPGPATH]; + char dirpath[MAXPGPATH] = ""; struct stat sb; /* test pgdata */ diff --git a/dirutil.h b/dirutil.h index 06bd3133..8198184a 100644 --- a/dirutil.h +++ b/dirutil.h @@ -39,7 +39,7 @@ extern bool set_dir_permissions(char *path); extern DataDirState check_dir(char *path); extern bool create_dir(char *path); -extern bool is_pg_dir(char *path); +extern bool is_pg_dir(const char *path); extern PgDirState is_pg_running(char *path); extern bool create_pg_dir(char *path, bool force); extern int rmdir_recursive(char *path); diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index 9b05830f..b3a8322b 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -50,6 +50,13 @@ + + + repmgr node check --data-directory-config + option added; this is to confirm &repmgr; is correctly configured. + + + diff --git a/doc/repmgr-cluster-show.sgml b/doc/repmgr-cluster-show.sgml index 94e52d10..b702bf80 100644 --- a/doc/repmgr-cluster-show.sgml +++ b/doc/repmgr-cluster-show.sgml @@ -87,44 +87,44 @@ - - repmgr cluster show accepts an optional parameter --csv, which - outputs the replication cluster's status in a simple CSV format, suitable for - parsing by scripts, e.g.: - + + repmgr cluster show accepts an optional parameter --csv, which + outputs the replication cluster's status in a simple CSV format, suitable for + parsing by scripts, e.g.: + $ repmgr -f /etc/repmgr.conf cluster show --csv 1,-1,-1 2,0,0 3,0,1 - - - The columns have following meanings: - - - - node ID - - - - + + + The columns have following meanings: + + + + node ID + + + + availability (0 = available, -1 = unavailable) - - - - - recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown) - - - - - - + + + + + recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown) + + + + + + - Suppress display of the conninfo column. + Suppress display of the conninfo column. diff --git a/doc/repmgr-node-check.sgml b/doc/repmgr-node-check.sgml index 95ad2941..88f00ec6 100644 --- a/doc/repmgr-node-check.sgml +++ b/doc/repmgr-node-check.sgml @@ -86,6 +86,16 @@ + + + --data-directory-config: checks the data directory configured in + repmgr.conf matches the actual data directory. + This check is not directly related to replication, but is useful to verify &repmgr; + is correctly configured. + + + + @@ -105,6 +115,7 @@ --nagios: generate output in a Nagios-compatible format + (for individual checks only) diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 14a138ba..3268c7f3 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -48,7 +48,7 @@ static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); - +static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); /* * NODE STATUS * @@ -82,6 +82,9 @@ do_node_status(void) int server_version_num = UNKNOWN_SERVER_VERSION_NUM; char server_version_str[MAXVERSIONSTR] = ""; + /* + * A database connection is *not* required for this check + */ if (runtime_options.is_shutdown_cleanly == true) { return _do_node_status_is_shutdown_cleanly(); @@ -653,7 +656,8 @@ _do_node_status_is_shutdown_cleanly(void) node_status = NODE_STATUS_DOWN; } - log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status)); + log_verbose(LOG_DEBUG, "node status determined as: %s", + print_node_status(node_status)); switch (node_status) { @@ -789,6 +793,16 @@ do_node_check(void) exit(return_code); } + if (runtime_options.data_directory_config == true) + { + return_code = do_node_check_data_directory(conn, + runtime_options.output_mode, + &node_info, + NULL); + PQfinish(conn); + exit(return_code); + } + if (runtime_options.output_mode == OM_NAGIOS) { @@ -821,6 +835,9 @@ do_node_check(void) if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK) issue_detected = true; + if (do_node_check_data_directory(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK) + issue_detected = true; + if (runtime_options.output_mode == OM_CSV) { appendPQExpBuffer(&output, @@ -1442,11 +1459,9 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i switch (mode) { case OM_OPTFORMAT: - { - printf("--status=%s %s\n", - output_check_status(status), - details.data); - } + printf("--status=%s %s\n", + output_check_status(status), + details.data); break; case OM_NAGIOS: printf("REPMGR_REPLICATION_LAG %s: %s\n", @@ -1797,6 +1812,135 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf } +CheckStatus +do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) +{ + CheckStatus status = CHECK_STATUS_OK; + char actual_data_directory[MAXPGPATH] = ""; + PQExpBufferData details; + + if (mode == OM_CSV && list_output == NULL) + { + log_error(_("--csv output not provided with --data-directory-config option")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + initPQExpBuffer(&details); + /* + * Check actual data directory matches that in repmgr.conf; note this requires + * a superuser connection + */ + + if (is_superuser_connection(conn, NULL) == true) + { + /* we expect to have a database connection */ + if (get_pg_setting(conn, "data_directory", actual_data_directory) == false) + { + appendPQExpBuffer(&details, + _("unable to determine current \"data_directory\"")); + status = CHECK_STATUS_UNKNOWN; + } + + if (strncmp(actual_data_directory, config_file_options.data_directory, MAXPGPATH) != 0) + { + if (mode != OM_NAGIOS) + { + appendPQExpBuffer(&details, + _("configured \"data_directory\" is \"%s\"; "), + config_file_options.data_directory); + } + + appendPQExpBuffer(&details, + "actual data directory is \"%s\"", + actual_data_directory); + + status = CHECK_STATUS_CRITICAL; + } + else + { + appendPQExpBuffer(&details, + _("configured \"data_directory\" is \"%s\""), + config_file_options.data_directory); + } + } + /* + * If no superuser connection available, sanity-check that the configuration directory looks + * like a PostgreSQL directory and hope it's the right one. + */ + else + { + if (mode == OM_TEXT) + { + log_info(_("connection is not a superuser connection, falling back to simple check")); + + /* XXX add -S/--superuser option */ + if (PQserverVersion(conn) >= 100000) + { + log_hint(_("add the \"%s\" user to group \"pg_read_all_settings\""), + PQuser(conn)); + } + } + + if (is_pg_dir(config_file_options.data_directory) == false) + { + if (mode == OM_NAGIOS) + { + appendPQExpBufferStr(&details, + _("configured \"data_directory\" is not a PostgreSQL data directory")); + } + else + { + appendPQExpBuffer(&details, + _("configured \"data_directory\" \"%s\" is not a PostgreSQL data directory"), + actual_data_directory); + } + + status = CHECK_STATUS_CRITICAL; + } + } + + switch (mode) + { + case OM_OPTFORMAT: + printf("--configured-data-directory=%s\n", + output_check_status(status)); + break; + case OM_NAGIOS: + printf("REPMGR_DATA_DIRECTORY %s: %s", + output_check_status(status), + config_file_options.data_directory); + + if (status == CHECK_STATUS_CRITICAL) + { + printf(" | %s", details.data); + } + puts(""); + break; + case OM_CSV: + case OM_TEXT: + if (list_output != NULL) + { + check_status_list_set(list_output, + "Configured data directory", + status, + details.data); + } + else + { + printf("%s (%s)\n", + output_check_status(status), + details.data); + } + default: + break; + } + + termPQExpBuffer(&details); + + return status; +} + void do_node_service(void) @@ -2924,8 +3068,8 @@ do_node_help(void) puts(""); printf(_(" Configuration file required, runs on local node only.\n")); puts(""); - printf(_(" --csv emit output as CSV\n")); - printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); + printf(_(" --csv emit output as CSV (not available for individual check output)\n")); + printf(_(" --nagios emit output in Nagios format (individual check output only)\n")); puts(""); printf(_(" Following options check an individual status:\n")); printf(_(" --archive-ready number of WAL files ready for archiving\n")); @@ -2934,6 +3078,7 @@ do_node_help(void) printf(_(" --role check node has expected role\n")); printf(_(" --slots check for inactive replication slots\n")); printf(_(" --missing-slots check for missing replication slots\n")); + printf(_(" --data-directory-config check repmgr's data directory configuration\n")); puts(""); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 798a43ba..1d639af5 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3472,6 +3472,10 @@ do_standby_switchover(void) } termPQExpBuffer(&command_output); + /* check remote repmgr has the data directory correctly configured */ + + // - add repmgr node check --data-directory + /* * populate local node record with current state of various replication-related * values, so we can check for sufficient walsenders and replication slots diff --git a/repmgr-client-global.h b/repmgr-client-global.h index b87180f4..a3dba466 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -111,6 +111,7 @@ typedef struct bool missing_slots; bool has_passfile; bool replication_connection; + bool data_directory_config; /* "node rejoin" options */ char config_files[MAXLEN]; @@ -161,7 +162,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, false, false, false, false, false, false, \ + false, false, false, false, false, false, false, false, false, \ /* "node rejoin" options */ \ "", \ /* "node service" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index 013fdaf0..2e31f7c3 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -497,6 +497,10 @@ main(int argc, char **argv) runtime_options.replication_connection = true; break; + case OPT_DATA_DIRECTORY_CONFIG: + runtime_options.data_directory_config = true; + break; + /*-------------------- * "node rejoin" options *-------------------- diff --git a/repmgr-client.h b/repmgr-client.h index 68063483..fe50f277 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -93,6 +93,7 @@ #define OPT_MISSING_SLOTS 1041 #define OPT_REPMGRD_NO_PAUSE 1042 #define OPT_VERSION_NUMBER 1043 +#define OPT_DATA_DIRECTORY_CONFIG 1044 /* deprecated since 3.3 */ @@ -177,6 +178,7 @@ static struct option long_options[] = {"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS}, {"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE}, {"replication-connection", no_argument, NULL, OPT_REPL_CONN}, + {"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG}, /* "node rejoin" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES},