diff --git a/README.md b/README.md index a0b8011d..fa7775e1 100644 --- a/README.md +++ b/README.md @@ -120,10 +120,14 @@ The following commands are available: * `--role`: checks if the node has the expected role * `--replication-lag"`: checks if the node is lagging by more than `replication_lag_warning` or `replication_lag_critical` seconds. - * `--archiver`: checks for WAL files which have not yet been archived + * `--archive-ready`: checks for WAL files which have not yet been archived * `--downstream`: checks that the expected downstream nodes are attached * `--slots`: checks there are no inactive replication slots + Individual checks can also be output in a Nagios-compatible format with + the option `--nagios`. + + * `cluster show` Displays information about each active node in the replication cluster. This diff --git a/configfile.c b/configfile.c index e51ba461..34cc3fdb 100644 --- a/configfile.c +++ b/configfile.c @@ -390,10 +390,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->restore_command, value, MAXLEN); /* node check settings */ - else if (strcmp(name, "archiver_lag_warning") == 0) - options->archiver_lag_warning = repmgr_atoi(value, name, error_list, 1); - else if (strcmp(name, "archiver_lag_critcial") == 0) - options->archiver_lag_critical = repmgr_atoi(value, name, error_list, 1); + else if (strcmp(name, "archive_ready_warning") == 0) + options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1); + else if (strcmp(name, "archive_ready_critcial") == 0) + options->archive_ready_critical = repmgr_atoi(value, name, error_list, 1); else if (strcmp(name, "replication_lag_warning") == 0) options->replication_lag_warning = repmgr_atoi(value, name, error_list, 1); else if (strcmp(name, "replication_lag_critical") == 0) @@ -617,10 +617,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * /* other sanity checks */ - if (options->archiver_lag_warning >= options->archiver_lag_critical) + if (options->archive_ready_warning >= options->archive_ready_critical) { item_list_append(error_list, - _("\archiver_lag_critical\" must be greater than \"archiver_lag_warning\"")); + _("\archive_ready_critical\" must be greater than \"archive_ready_warning\"")); } if( options->replication_lag_warning >= options->replication_lag_critical) diff --git a/configfile.h b/configfile.h index ca07fc92..c6d50d6a 100644 --- a/configfile.h +++ b/configfile.h @@ -77,8 +77,8 @@ typedef struct TablespaceList tablespace_mapping; /* node check settings */ - int archiver_lag_warning; - int archiver_lag_critical; + int archive_ready_warning; + int archive_ready_critical; int replication_lag_warning; int replication_lag_critical; @@ -135,7 +135,7 @@ typedef struct /* standby clone settings */ \ false, "", "", "", "", { NULL, NULL }, \ /* node check settings */ \ - DEFAULT_ARCHIVER_LAG_WARNING, DEFAULT_ARCHIVER_LAG_CRITICAL, \ + DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \ DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \ /* repmgrd settings */ \ FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \ diff --git a/repmgr-action-node.c b/repmgr-action-node.c index f4b262bc..93c2e111 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -29,7 +29,7 @@ static void _do_node_status_is_shutdown(void); static void _do_node_archive_config(void); static void _do_node_restore_config(void); -static CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output); +static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output); static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output); static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); @@ -554,9 +554,9 @@ do_node_check(void) /* handle specific checks * ====================== */ - if (runtime_options.archiver == true) + if (runtime_options.archive_ready == true) { - (void) do_node_check_archiver(conn, runtime_options.output_mode, NULL); + (void) do_node_check_archive_ready(conn, runtime_options.output_mode, NULL); PQfinish(conn); return; } @@ -598,7 +598,7 @@ do_node_check(void) /* order functions are called is also output order */ (void) do_node_check_role(conn, runtime_options.output_mode, &node_info, &status_list); (void) do_node_check_replication_lag(conn, runtime_options.output_mode, &node_info, &status_list); - (void) do_node_check_archiver(conn, runtime_options.output_mode, &status_list); + (void) do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list); (void) do_node_check_downstream(conn, runtime_options.output_mode, &status_list); (void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list); @@ -722,7 +722,7 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS switch (mode) { case OM_NAGIOS: - printf("PG_SERVER_ROLE %s: %s\n", + printf("REPMGR_SERVER_ROLE %s: %s\n", output_check_status(status), details.data); break; @@ -786,7 +786,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check switch (mode) { case OM_NAGIOS: - printf("PG_INACTIVE_SLOTS %s: %s\n", + printf("REPMGR_INACTIVE_SLOTS %s: %s\n", output_check_status(status), details.data); break; @@ -814,7 +814,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check static CheckStatus -do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output) +do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output) { int ready_archive_files = 0; CheckStatus status = CHECK_STATUS_UNKNOWN; @@ -822,7 +822,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp if (mode == OM_CSV) { - log_error(_("--csv output not provided with --archiver option")); + log_error(_("--csv output not provided with --archive-ready option")); PQfinish(conn); exit(ERR_BAD_CONFIG); } @@ -831,7 +831,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp ready_archive_files = get_ready_archive_files(conn, config_file_options.data_directory); - if (ready_archive_files > config_file_options.archiver_lag_critical) + if (ready_archive_files > config_file_options.archive_ready_critical) { status = CHECK_STATUS_CRITICAL; @@ -841,26 +841,29 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp appendPQExpBuffer( &details, "--files=%i --threshold=%i", - ready_archive_files, config_file_options.archiver_lag_critical); + ready_archive_files, config_file_options.archive_ready_critical); break; case OM_NAGIOS: appendPQExpBuffer( &details, - "%i pending files (critical: %i)", - ready_archive_files, config_file_options.archiver_lag_critical); + "%i pending archive ready files | files=%i;%i;%i", + ready_archive_files, + ready_archive_files, + config_file_options.archive_ready_warning, + config_file_options.archive_ready_critical); break; case OM_TEXT: appendPQExpBuffer( &details, - "%i pending files, threshold: %i", - ready_archive_files, config_file_options.archiver_lag_critical); + "%i pending archive ready files, critical threshold: %i", + ready_archive_files, config_file_options.archive_ready_critical); break; default: break; } } - else if (ready_archive_files > config_file_options.archiver_lag_warning) + else if (ready_archive_files > config_file_options.archive_ready_warning) { status = CHECK_STATUS_WARNING; @@ -870,19 +873,23 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp appendPQExpBuffer( &details, "--files=%i --threshold=%i", - ready_archive_files, config_file_options.archiver_lag_warning); + ready_archive_files, config_file_options.archive_ready_warning); break; case OM_NAGIOS: appendPQExpBuffer( &details, - "%i pending files (warning: %i)", - ready_archive_files, config_file_options.archiver_lag_warning); + "%i pending archive ready files | files=%i;%i;%i", + ready_archive_files, + ready_archive_files, + config_file_options.archive_ready_warning, + config_file_options.archive_ready_critical); + break; case OM_TEXT: appendPQExpBuffer( &details, - "%i pending files (threshold: %i)", - ready_archive_files, config_file_options.archiver_lag_warning); + "%i pending archive ready files (threshold: %i)", + ready_archive_files, config_file_options.archive_ready_warning); break; default: @@ -920,10 +927,18 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp "--files=%i", ready_archive_files); break; case OM_NAGIOS: + appendPQExpBuffer( + &details, + "%i pending archive ready files | files=%i;%i;%i", + ready_archive_files, + ready_archive_files, + config_file_options.archive_ready_warning, + config_file_options.archive_ready_critical); + break; case OM_TEXT: appendPQExpBuffer( &details, - "%i pending files", ready_archive_files); + "%i pending archive ready files", ready_archive_files); break; default: @@ -941,7 +956,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp } break; case OM_NAGIOS: - printf("PG_ARCHIVER %s: %s\n", + printf("REPMGR_ARCHIVE_READY %s: %s\n", output_check_status(status), details.data); break; @@ -1126,7 +1141,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i } break; case OM_NAGIOS: - printf("PG_REPLICATION_LAG %s: %s\n", + printf("REPMGR_REPLICATION_LAG %s: %s\n", output_check_status(status), details.data); break; @@ -1235,7 +1250,7 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou switch (mode) { case OM_NAGIOS: - printf("PG_DOWNSTREAM_SERVERS %s: %s\n", + printf("REPMGR_DOWNSTREAM_SERVERS %s: %s\n", output_check_status(status), details.data); break; diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 709decf2..b8ae1296 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1792,7 +1792,7 @@ do_standby_switchover(void) initPQExpBuffer(&remote_command_str); make_remote_repmgr_path(&remote_command_str, &remote_node_record); appendPQExpBuffer(&remote_command_str, - "node check --terse -LERROR --archiver --optformat"); + "node check --terse -LERROR --archive-ready --optformat"); initPQExpBuffer(&command_output); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 8c8957f3..43d39ee1 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -84,7 +84,7 @@ typedef struct bool is_shutdown; /* "node check" options */ - bool archiver; + bool archive_ready; bool downstream; bool replication_lag; bool role; diff --git a/repmgr-client.c b/repmgr-client.c index 44700e2b..59b665d7 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -422,8 +422,8 @@ main(int argc, char **argv) /* "node check" options * * --------------------- */ - case OPT_ARCHIVER: - runtime_options.archiver = true; + case OPT_ARCHIVE_READY: + runtime_options.archive_ready = true; break; case OPT_DOWNSTREAM: diff --git a/repmgr-client.h b/repmgr-client.h index db01dcf2..f6928e39 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -65,7 +65,7 @@ #define OPT_ALWAYS_PROMOTE 1029 #define OPT_FORCE_REWIND 1030 #define OPT_NAGIOS 1031 -#define OPT_ARCHIVER 1032 +#define OPT_ARCHIVE_READY 1032 #define OPT_OPTFORMAT 1033 #define OPT_REPLICATION_LAG 1034 #define OPT_CONFIG_FILES 1035 @@ -140,7 +140,7 @@ static struct option long_options[] = {"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN }, /* "node check" options */ - {"archiver", no_argument, NULL, OPT_ARCHIVER }, + {"archive-ready", no_argument, NULL, OPT_ARCHIVE_READY }, {"downstream", no_argument, NULL, OPT_DOWNSTREAM }, {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG }, {"role", no_argument, NULL, OPT_ROLE }, diff --git a/repmgr.conf.sample b/repmgr.conf.sample index c66a9b65..4df8208f 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -252,8 +252,8 @@ ssh_options='-q' # Options to append to "ssh" # Various warning/critical thresholds used by "repmgr node check". -#archiver_lag_warning=16 # repmgr node check --archiver -#archiver_lag_critical=128 # +#archive_ready_warning=16 # repmgr node check --archiver +#archive_ready_critical=128 # # Numbers of files pending archiving via PostgreSQL's # "archive_command" configuration parameter. If # files can't be archived fast enough, or the archive diff --git a/repmgr.h b/repmgr.h index c2d83d84..3b41e39d 100644 --- a/repmgr.h +++ b/repmgr.h @@ -54,8 +54,8 @@ #define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ #define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */ #define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */ -#define DEFAULT_ARCHIVER_LAG_WARNING 16 /* WAL files */ -#define DEFAULT_ARCHIVER_LAG_CRITICAL 128 /* WAL files */ +#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */ +#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */ #define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */ #define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */