From 2499b42ef86d5ba3aceb4353fdebc9c488502af6 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 8 Aug 2017 00:37:20 +0900 Subject: [PATCH] switchover: check for pending archive files on the demotion candidate If the current primary (demotion candidate) still has any files to archive, it will delay the shutdown until all files are archived. If there is a substantial number of files, and/or the archive command executes slowly, this will probably lead to an unwelcome delay in the switchover process. --- configfile.c | 15 +++ configfile.h | 6 ++ dbutils.c | 83 +++++++++++++++ dbutils.h | 1 + repmgr-action-cluster.c | 6 +- repmgr-action-node.c | 94 ++++++++++++++++- repmgr-action-node.h | 2 + repmgr-action-standby.c | 225 +++++++++++++++++++++++++++++++++++++--- repmgr-client-global.h | 27 ++++- repmgr-client.c | 55 +++++++++- repmgr-client.h | 79 +++++++------- repmgr.h | 17 +-- 12 files changed, 543 insertions(+), 67 deletions(-) diff --git a/configfile.c b/configfile.c index 01bcdb0b..191bbea1 100644 --- a/configfile.c +++ b/configfile.c @@ -390,6 +390,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * else if (strcmp(name, "restore_command") == 0) strncpy(options->restore_command, value, MAXLEN); + /* node check settings */ + else if (strcmp(name, "archiver_lag_warning") == 0) + options->archiver_lag_warning = repmgr_atoi(value, name, error_list, 1); + else if (strcmp(name, "archiver_lag_critcial") == 0) + options->archiver_lag_critical = repmgr_atoi(value, name, error_list, 1); + /* repmgrd settings */ else if (strcmp(name, "failover_mode") == 0) { @@ -604,6 +610,15 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * _("use \"barman_server\" for the name of the [server] section in the Barman configururation file")); } + + /* other sanity checks */ + + if (options->archiver_lag_warning >= options->archiver_lag_critical) + { + item_list_append(error_list, + _("\archiver_lag_critical\" must be greater than \"archiver_lag_warning\"")); + } + } diff --git a/configfile.h b/configfile.h index 3059b7b9..f9794856 100644 --- a/configfile.h +++ b/configfile.h @@ -72,6 +72,10 @@ typedef struct char restore_command[MAXLEN]; TablespaceList tablespace_mapping; + /* node check settings */ + int archiver_lag_warning; + int archiver_lag_critical; + /* repmgrd settings */ failover_mode_opt failover_mode; char location[MAXLEN]; @@ -124,6 +128,8 @@ typedef struct "", "", "", DEFAULT_LOG_STATUS_INTERVAL, \ /* standby clone settings */ \ false, "", "", "", "", { NULL, NULL }, \ + /* node check settings */ \ + DEFAULT_ARCHIVER_LAG_WARNING, DEFAULT_ARCHIVER_LAG_CRITICAL, \ /* repmgrd settings */ \ FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \ DEFAULT_MONITORING_INTERVAL, \ diff --git a/dbutils.c b/dbutils.c index d9e24afb..2addc162 100644 --- a/dbutils.c +++ b/dbutils.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "repmgr.h" #include "dbutils.h" @@ -1293,6 +1295,87 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea } +int +get_ready_archive_files(PGconn *conn, const char *data_directory) +{ + char archive_status_dir[MAXPGPATH] = ""; + struct stat statbuf; + struct dirent *arcdir_ent; + DIR *arcdir; + + + int ready_count = 0; + + if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) + server_version_num = get_server_version(conn, NULL); + + if (server_version_num >= 1000000) + { + snprintf(archive_status_dir, MAXPGPATH, + "%s/pg_wal/archive_status", + data_directory); + } + else + { + snprintf(archive_status_dir, MAXPGPATH, + "%s/pg_xlog/archive_status", + data_directory); + } + + /* sanity-check directory path */ + if (stat(archive_status_dir, &statbuf) == -1) + { + log_error(_("unable to access archive_status directory \"%s\""), + archive_status_dir); + log_detail("%s", strerror(errno)); + /* XXX magic number*/ + return -1; + } + + arcdir = opendir(archive_status_dir); + + if (arcdir == NULL) + { + log_error(_("unable to open archive directory \"%s\""), + archive_status_dir); + log_detail("%s", strerror(errno)); + /* XXX magic number*/ + return -1; + } + + while ((arcdir_ent = readdir(arcdir)) != NULL) + { + struct stat statbuf; + char file_path[MAXPGPATH] = ""; + int basenamelen; + + snprintf(file_path, MAXPGPATH, + "%s/%s", + archive_status_dir, + arcdir_ent->d_name); + + /* skip non-files */ + if (stat(file_path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + { + continue; + } + + basenamelen = (int) strlen(arcdir_ent->d_name) - 6; + + /* + * count anything ending in ".ready"; for a more precise implementation + * see: src/backend/postmaster/pgarch.c + */ + if (strcmp(arcdir_ent->d_name + basenamelen, ".ready") == 0) + ready_count ++; + } + + closedir(arcdir); + + return ready_count; +} + + /* ================ */ /* result functions */ /* ================ */ diff --git a/dbutils.h b/dbutils.h index 71554312..0d8677cb 100644 --- a/dbutils.h +++ b/dbutils.h @@ -323,6 +323,7 @@ RecoveryType get_recovery_type(PGconn *conn); int get_primary_node_id(PGconn *conn); bool get_replication_info(PGconn *conn, ReplInfo *replication_info); bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); +int get_ready_archive_files(PGconn *conn, const char *data_directory); /* extension functions */ ExtensionStatus get_repmgr_extension_status(PGconn *conn); diff --git a/repmgr-action-cluster.c b/repmgr-action-cluster.c index d57bcf5a..d4802179 100644 --- a/repmgr-action-cluster.c +++ b/repmgr-action-cluster.c @@ -250,7 +250,7 @@ do_cluster_show(void) } - if (! runtime_options.csv) + if (runtime_options.output_mode == OM_TEXT) { for (i = 0; i < SHOW_HEADER_COUNT; i++) { @@ -283,7 +283,7 @@ do_cluster_show(void) for (cell = nodes.head; cell; cell = cell->next) { - if (runtime_options.csv) + if (runtime_options.output_mode == OM_CSV) { int connection_status = (PQstatus(conn) == CONNECTION_OK) ? 0 : -1; int recovery_type = RECTYPE_UNKNOWN; @@ -580,7 +580,7 @@ do_cluster_matrix() n = build_cluster_matrix(&matrix_rec_list, &name_length); - if (runtime_options.csv == true) + if (runtime_options.output_mode == OM_CSV) { for (i = 0; i < n; i++) for (j = 0; j < n; j++) diff --git a/repmgr-action-node.c b/repmgr-action-node.c index f1b771af..69cc7f41 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -12,6 +12,7 @@ #include "repmgr.h" #include "controldata.h" #include "dirutil.h" +#include "dbutils.h" #include "repmgr-client-global.h" #include "repmgr-action-node.h" @@ -208,17 +209,17 @@ do_node_status(void) key_value_list_set( &node_status, "Last received LSN", - ""); + "(none)"); key_value_list_set( &node_status, "Last replayed LSN", - ""); + "(none)"); } initPQExpBuffer(&output); - if (runtime_options.csv == true) + if (runtime_options.output_mode == OM_CSV) { /* output header */ appendPQExpBuffer( @@ -379,8 +380,92 @@ void _do_node_status_is_shutdown(void) void do_node_check(void) { + PGconn *conn; + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, true); + else + conn = establish_db_connection_by_params(&source_conninfo, true); + + /* handle specific checks + * ====================== */ + if (runtime_options.archiver == true) + { + (void) do_node_check_archiver(conn, runtime_options.output_mode, NULL); + PQfinish(conn); + return; + } } +bool +do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output) +{ + bool own_buffer = false; + int ready_archive_files = 0; + PQExpBufferData buf; + bool check_ok = true; + + if (output == NULL) + { + initPQExpBuffer(&buf); + output = &buf; + own_buffer = true; + } + + ready_archive_files = get_ready_archive_files(conn, config_file_options.data_directory); + + if (ready_archive_files > config_file_options.archiver_lag_critical) + { + switch (mode) + { + case OM_OPTFORMAT: + appendPQExpBuffer( + output, + "--status=CRITICAL --files=%i --threshold=%i", + ready_archive_files, + config_file_options.archiver_lag_critical); + break; + default: + break; + } + } + else if (ready_archive_files > config_file_options.archiver_lag_warning) + { + switch (mode) + { + case OM_OPTFORMAT: + appendPQExpBuffer( + output, + "--status=WARNING --files=%i --threshold=%i", + ready_archive_files, + config_file_options.archiver_lag_warning); + break; + default: + break; + } + } + else + { + switch (mode) + { + case OM_OPTFORMAT: + appendPQExpBuffer( + output, + "--status=OK --files=%i", + ready_archive_files); + break; + default: + break; + } + } + if (own_buffer == true) + { + printf("%s\n", buf.data); + termPQExpBuffer(&buf); + } + + return check_ok; +} // --action=... // --check @@ -723,7 +808,8 @@ do_node_restore_config(void) exit(ERR_BAD_CONFIG); } - while ((arcdir_ent = readdir(arcdir)) != NULL) { + while ((arcdir_ent = readdir(arcdir)) != NULL) + { struct stat statbuf; char src_file_path[MAXPGPATH]; char dest_file_path[MAXPGPATH]; diff --git a/repmgr-action-node.h b/repmgr-action-node.h index 22878b1e..0c5e8f52 100644 --- a/repmgr-action-node.h +++ b/repmgr-action-node.h @@ -8,6 +8,8 @@ extern void do_node_status(void); extern void do_node_check(void); +extern bool do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output); + extern void do_node_archive_config(void); extern void do_node_restore_config(void); extern void do_node_service(void); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 7be9b4f1..247b2f26 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -48,6 +48,8 @@ static t_conninfo_param_list recovery_conninfo; static char recovery_conninfo_str[MAXLEN]; static char upstream_repluser[NAMEDATALEN]; +static int source_server_version_num = UNKNOWN_SERVER_VERSION_NUM; + static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER; static standy_clone_mode mode; @@ -79,6 +81,7 @@ static int get_tablespace_data_barman(char *, TablespaceDataList *); static char *make_barman_ssh_command(char *buf); static NodeStatus parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkPoint); +static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold); /* * do_standby_clone() @@ -1341,11 +1344,14 @@ do_standby_follow(void) if (config_file_options.use_replication_slots) { - int server_version_num = get_server_version(primary_conn, NULL); + int primary_server_version_num = get_server_version(primary_conn, NULL); initPQExpBuffer(&event_details); - if (create_replication_slot(primary_conn, local_node_record.slot_name, server_version_num, &event_details) == false) + if (create_replication_slot(primary_conn, + local_node_record.slot_name, + primary_server_version_num, + &event_details) == false) { log_error("%s", event_details.data); @@ -1546,12 +1552,6 @@ do_standby_follow(void) * - currently only set up for two-node operation; any other * standbys will probably become downstream cascaded standbys * of the old primary once it's restarted - * - as we're executing repmgr remotely (on the old primary), - * we'll need the location of its configuration file; this - * can be provided explicitly with -C/--remote-config-file, - * otherwise repmgr will look in default locations on the - * remote server (starting with the same path as the local - * configuration file). * * TODO: * - make connection test timeouts/intervals configurable (see below) @@ -1711,8 +1711,8 @@ do_standby_switchover(void) termPQExpBuffer(&reason); } - PQfinish(local_conn); PQfinish(remote_conn); + PQfinish(local_conn); /* * Check that we can connect by SSH to the remote (current primary) server @@ -1728,6 +1728,64 @@ do_standby_switchover(void) exit(ERR_BAD_CONFIG); } + /* check replication status */ + { + bool command_success; + int files = 0; + int threshold = 0; + CheckStatus status; + + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + "node check --terse -LERROR --archiver --optformat"); + + initPQExpBuffer(&command_output); + + command_success = remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + status = parse_node_check_archiver(command_output.data, &files, &threshold); + + log_debug("%i %i; '%s'", files, threshold, command_output.data); + if (status == CHECK_STATUS_CRITICAL) + { + if (runtime_options.force == false) + { + log_error(_("number of pending archive files on demotion candidate \"%s\" is critical"), + remote_node_record.node_name); + log_detail(_("%i pending archive files (critical threshold: %i)"), + files, threshold); + log_hint(_("PostgreSQL will not shut down until all files are archived; use -F/--force to continue anyway")); + exit(ERR_SWITCHOVER_FAIL); + } + else + { + log_warning(_("number of pending archive files on demotion candidate \"%s\" is critical"), + remote_node_record.node_name); + log_detail(_("%i pending archive files (critical threshold: %i)"), + files, threshold); + log_notice(_("-F/--force set, continuing with switchover")); + } + + } + else if (status == CHECK_STATUS_WARNING) + { + log_warning(_("number of pending archive files on demotion candidate \"%s\" is warning"), + remote_node_record.node_name); + log_detail(_("%i pending archive files (warning threshold: %i)"), + files, threshold); + log_hint(_("PostgreSQL will not shut down until all files are archived")); + } + } + + + /* Determine the remote's configuration file location */ /* -------------------------------------------------- */ @@ -1854,8 +1912,6 @@ do_standby_switchover(void) * after a certain time. */ - // TODO: check remote node for archive status etc. - initPQExpBuffer(&remote_command_str); initPQExpBuffer(&command_output); @@ -2111,9 +2167,9 @@ check_source_server() /* Verify that upstream node is a supported server version */ log_verbose(LOG_INFO, _("connected to source node, checking its state")); - server_version_num = check_server_version(source_conn, "primary", true, NULL); + source_server_version_num = check_server_version(source_conn, "primary", true, NULL); - check_upstream_config(source_conn, server_version_num, true); + check_upstream_config(source_conn, source_server_version_num, true); if (get_cluster_size(source_conn, cluster_size) == false) exit(ERR_DB_QUERY); @@ -2488,7 +2544,7 @@ initialise_direct_clone(t_node_info *node_record) PQExpBufferData event_details; initPQExpBuffer(&event_details); - if (create_replication_slot(privileged_conn, node_record->slot_name, server_version_num, &event_details) == false) + if (create_replication_slot(privileged_conn, node_record->slot_name, source_server_version_num, &event_details) == false) { log_error("%s", event_details.data); @@ -3541,3 +3597,144 @@ parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkP return node_status; } + + + +static CheckStatus +parse_node_check_archiver(const char *node_check_output, int *files, int *threshold) +{ + int options_len = 0; + char *options_string = NULL; + char *options_string_ptr = NULL; + + CheckStatus status = CHECK_STATUS_UNKNOWN; + + + /* + * Add parsed options to this list, then copy to an array + * to pass to getopt + */ + static ItemList option_argv = { NULL, NULL }; + + char *argv_item; + int c, argc_item = 1; + + char **argv_array; + ItemListCell *cell; + + int optindex = 0; + + /* We're only interested in these options */ + static struct option long_options[] = + { + {"status", required_argument, NULL, 'S'}, + {"files", required_argument, NULL, 'f'}, + {"threshold", required_argument, NULL, 't'}, + {NULL, 0, NULL, 0} + }; + + *files = 0; + *threshold = 0; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_check_output)) + { + return status; + } + + options_len = strlen(node_check_output) + 1; + options_string = pg_malloc(options_len); + options_string_ptr = options_string; + + /* Copy the string before operating on it with strtok() */ + strncpy(options_string, node_check_output, options_len); + + /* Extract arguments into a list and keep a count of the total */ + while ((argv_item = strtok(options_string_ptr, " ")) != NULL) + { + item_list_append(&option_argv, argv_item); + + argc_item++; + + if (options_string_ptr != NULL) + options_string_ptr = NULL; + } + + /* + * Array of argument values to pass to getopt_long - this will need to + * include an empty string as the first value (normally this would be + * the program name) + */ + argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2)); + + /* Insert a blank dummy program name at the start of the array */ + argv_array[0] = pg_malloc0(1); + + c = 1; + + /* + * Copy the previously extracted arguments from our list to the array + */ + for (cell = option_argv.head; cell; cell = cell->next) + { + int argv_len = strlen(cell->string) + 1; + + argv_array[c] = pg_malloc0(argv_len); + + strncpy(argv_array[c], cell->string, argv_len); + + c++; + } + + argv_array[c] = NULL; + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "f:S:t:", long_options, + &optindex)) != -1) + { + switch (c) + { + /* --files */ + case 'f': + *files = atoi(optarg); + break; + + case 't': + *threshold = atoi(optarg); + break; + + /* --status */ + case 'S': + { + if (strncmp(optarg, "OK", MAXLEN) == 0) + { + status = CHECK_STATUS_OK; + } + else if (strncmp(optarg, "WARNING", MAXLEN) == 0) + { + status = CHECK_STATUS_WARNING; + } + else if (strncmp(optarg, "CRITICAL", MAXLEN) == 0) + { + status = CHECK_STATUS_CRITICAL; + } + else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0) + { + status = CHECK_STATUS_UNKNOWN; + } + else + { + status = CHECK_STATUS_UNKNOWN; + } + } + break; + } + } + + return status; +} diff --git a/repmgr-client-global.h b/repmgr-client-global.h index d6bb2b3c..58e7891e 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -15,6 +15,13 @@ /* default value for "cluster event --limit"*/ #define CLUSTER_EVENT_LIMIT 20 +typedef enum { + OM_TEXT, + OM_CSV, + OM_NAGIOS, + OM_OPTFORMAT +} OutputMode; + typedef struct { /* configuration metadata */ @@ -39,6 +46,8 @@ typedef struct /* output options */ bool csv; + bool nagios; + bool optformat; /* standard connection options */ char dbname[MAXLEN]; @@ -81,6 +90,9 @@ typedef struct /* "node status" options */ bool is_shutdown; + /* "node check" options */ + bool archiver; + /* "node service" options */ char action[MAXLEN]; bool check; @@ -94,6 +106,7 @@ typedef struct /* following options for internal use */ char config_archive_dir[MAXPGPATH]; + OutputMode output_mode; } t_runtime_options; #define T_RUNTIME_OPTIONS_INITIALIZER { \ @@ -104,7 +117,7 @@ typedef struct /* logging options */ \ "", false, false, false, \ /* output options */ \ - false, \ + false, false, false, \ /* database connection options */ \ "", "", "", "", \ /* other connection options */ \ @@ -120,11 +133,14 @@ typedef struct "", false, false, \ /* "node status" options */ \ false, \ + /* "node check" options */ \ + false, \ /* "node service" options */ \ "", false, false, false, \ /* "cluster event" options */ \ false, "", CLUSTER_EVENT_LIMIT, \ - "/tmp" \ + /* Following options for internal use */ \ + "/tmp", OM_TEXT \ } @@ -144,6 +160,13 @@ typedef enum { } t_server_action; +typedef enum { + CHECK_STATUS_OK = 0, + CHECK_STATUS_WARNING, + CHECK_STATUS_CRITICAL, + CHECK_STATUS_UNKNOWN +} CheckStatus; + /* global configuration structures */ extern t_runtime_options runtime_options; diff --git a/repmgr-client.c b/repmgr-client.c index d8cf6df3..f1dd225a 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -15,6 +15,7 @@ * STANDBY REGISTER * STANDBY UNREGISTER * STANDBY PROMOTE + * STANDBY FOLLOW * STANDBY SWITCHOVER * * BDR REGISTER @@ -26,6 +27,7 @@ * CLUSTER MATRIX * * NODE STATUS + * NODE CHECK * * For internal use: * NODE ARCHIVE-CONFIG @@ -76,6 +78,7 @@ t_node_info target_node_info = T_NODE_INFO_INITIALIZER; static ItemList cli_errors = { NULL, NULL }; static ItemList cli_warnings = { NULL, NULL }; + int main(int argc, char **argv) { @@ -418,6 +421,12 @@ main(int argc, char **argv) runtime_options.is_shutdown = true; break; + /* "node check" options * + * --------------------- */ + case OPT_ARCHIVER: + runtime_options.archiver = true; + break; + /* "node service" options * * ---------------------- */ @@ -501,6 +510,14 @@ main(int argc, char **argv) runtime_options.csv = true; break; + case OPT_NAGIOS: + runtime_options.nagios = true; + break; + + case OPT_OPTFORMAT: + runtime_options.optformat = true; + break; + /* internal options */ case OPT_CONFIG_ARCHIVE_DIR: /* TODO: check this is an absolute path */ @@ -811,6 +828,22 @@ main(int argc, char **argv) print_item_list(&cli_warnings); } + /* post-processing following command line parameter checks + * ======================================================= */ + + if (runtime_options.csv == true) + { + runtime_options.output_mode = OM_CSV; + } + else if (runtime_options.nagios == true) + { + runtime_options.output_mode = OM_NAGIOS; + } + else if (runtime_options.optformat == true) + { + runtime_options.output_mode = OM_OPTFORMAT; + } + /* * The configuration file is not required for some actions (e.g. 'standby clone'), * however if available we'll parse it anyway for options like 'log_level', @@ -822,6 +855,7 @@ main(int argc, char **argv) &config_file_options, argv[0]); + /* Some configuration file items can be overriden by command line options */ /* Command-line parameter -L/--log-level overrides any setting in config file*/ if (*runtime_options.log_level != '\0') @@ -1359,7 +1393,7 @@ check_cli_parameters(const int action) } } - if ( runtime_options.force_rewind == true) + if (runtime_options.force_rewind == true) { switch (action) { @@ -1372,7 +1406,26 @@ check_cli_parameters(const int action) } } + /* check only one of --csv, --nagios and --optformat used */ + { + int used_options = 0; + if (runtime_options.csv == true) + used_options ++; + + if (runtime_options.nagios == true) + used_options ++; + + if (runtime_options.optformat == true) + used_options ++; + + if (used_options > 1) + { + /* TODO: list which options were used */ + item_list_append(&cli_errors, + "only one of --csv, --nagios and --optformat can be used"); + } + } } diff --git a/repmgr-client.h b/repmgr-client.h index 9bdd2daf..5517a7b3 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -34,40 +34,43 @@ #define CLUSTER_EVENT 20 /* command line options without short versions */ -#define OPT_HELP 1 -#define OPT_CHECK_UPSTREAM_CONFIG 2 -#define OPT_RECOVERY_MIN_APPLY_DELAY 3 -#define OPT_COPY_EXTERNAL_CONFIG_FILES 4 -#define OPT_CONFIG_ARCHIVE_DIR 5 -#define OPT_PG_REWIND 6 -#define OPT_PWPROMPT 7 -#define OPT_CSV 8 -#define OPT_NODE 9 -#define OPT_NODE_ID 10 -#define OPT_NODE_NAME 11 -#define OPT_WITHOUT_BARMAN 12 -#define OPT_NO_UPSTREAM_CONNECTION 13 -#define OPT_REGISTER_WAIT 14 -#define OPT_LOG_TO_FILE 15 -#define OPT_UPSTREAM_CONNINFO 16 +#define OPT_HELP 1001 +#define OPT_CHECK_UPSTREAM_CONFIG 1002 +#define OPT_RECOVERY_MIN_APPLY_DELAY 1003 +#define OPT_COPY_EXTERNAL_CONFIG_FILES 1004 +#define OPT_CONFIG_ARCHIVE_DIR 1005 +#define OPT_PG_REWIND 1006 +#define OPT_PWPROMPT 1007 +#define OPT_CSV 1008 +#define OPT_NODE 1009 +#define OPT_NODE_ID 1010 +#define OPT_NODE_NAME 1011 +#define OPT_WITHOUT_BARMAN 1012 +#define OPT_NO_UPSTREAM_CONNECTION 1013 +#define OPT_REGISTER_WAIT 1014 +#define OPT_LOG_TO_FILE 1015 +#define OPT_UPSTREAM_CONNINFO 1016 /* replaces --no-conninfo-password */ -#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 17 -#define OPT_REPLICATION_USER 18 -#define OPT_EVENT 19 -#define OPT_LIMIT 20 -#define OPT_ALL 21 -#define OPT_DRY_RUN 22 -#define OPT_UPSTREAM_NODE_ID 23 -#define OPT_ACTION 24 -#define OPT_LIST_ACTIONS 25 -#define OPT_CHECK 26 -#define OPT_CHECKPOINT 27 -#define OPT_IS_SHUTDOWN 28 -#define OPT_ALWAYS_PROMOTE 29 -#define OPT_FORCE_REWIND 30 +#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 1017 +#define OPT_REPLICATION_USER 1018 +#define OPT_EVENT 1019 +#define OPT_LIMIT 1020 +#define OPT_ALL 1021 +#define OPT_DRY_RUN 1022 +#define OPT_UPSTREAM_NODE_ID 1023 +#define OPT_ACTION 1024 +#define OPT_LIST_ACTIONS 1025 +#define OPT_CHECK 1026 +#define OPT_CHECKPOINT 1027 +#define OPT_IS_SHUTDOWN 1028 +#define OPT_ALWAYS_PROMOTE 1029 +#define OPT_FORCE_REWIND 1030 +#define OPT_NAGIOS 1031 +#define OPT_ARCHIVER 1032 +#define OPT_OPTFORMAT 1033 /* deprecated since 3.3 */ -#define OPT_DATA_DIR 998 -#define OPT_NO_CONNINFO_PASSWORD 999 +#define OPT_DATA_DIR 999 +#define OPT_NO_CONNINFO_PASSWORD 998 static struct option long_options[] = @@ -104,6 +107,8 @@ static struct option long_options[] = /* output options */ {"csv", no_argument, NULL, OPT_CSV}, + {"nagios", no_argument, NULL, OPT_NAGIOS}, + {"optformat", no_argument, NULL, OPT_OPTFORMAT}, /* "standby clone" options */ {"copy-external-config-files", optional_argument, NULL, OPT_COPY_EXTERNAL_CONFIG_FILES}, @@ -128,6 +133,9 @@ static struct option long_options[] = /* "node status" options */ {"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN }, +/* "node check" options */ + {"archiver", no_argument, NULL, OPT_ARCHIVER }, + /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION}, {"check", no_argument, NULL, OPT_CHECK}, @@ -146,6 +154,9 @@ static struct option long_options[] = {"no-conninfo-password", no_argument, NULL, OPT_NO_CONNINFO_PASSWORD}, /* legacy alias for -D/--pgdata*/ {"data-dir", required_argument, NULL, OPT_DATA_DIR}, + /* --node-id */ + {"node", required_argument, NULL, OPT_NODE}, + /* not yet handled */ {"keep-history", required_argument, NULL, 'k'}, @@ -154,10 +165,6 @@ static struct option long_options[] = {"pg_rewind", optional_argument, NULL, OPT_PG_REWIND}, {"pwprompt", optional_argument, NULL, OPT_PWPROMPT}, - {"node", required_argument, NULL, OPT_NODE}, - {"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN}, - {"copy-external-config-files", optional_argument, NULL, OPT_COPY_EXTERNAL_CONFIG_FILES}, - {"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT}, {NULL, 0, NULL, 0} }; diff --git a/repmgr.h b/repmgr.h index ddb2f193..5d3d78b9 100644 --- a/repmgr.h +++ b/repmgr.h @@ -40,13 +40,16 @@ #define DEFAULT_LOCATION "default" #define DEFAULT_PRIORITY 100 -#define DEFAULT_RECONNECTION_ATTEMPTS 6 -#define DEFAULT_RECONNECTION_INTERVAL 10 -#define DEFAULT_MONITORING_INTERVAL 2 -#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 -#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 -#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 -#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 +#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */ +#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */ +#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */ +#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */ +#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ +#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */ +#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */ +#define DEFAULT_ARCHIVER_LAG_WARNING 16 /* WAL files */ +#define DEFAULT_ARCHIVER_LAG_CRITICAL 128 /* WAL files */ + #define FAILOVER_NODES_MAX_CHECK 50