diff --git a/HISTORY b/HISTORY index 052c962a..2670eb7d 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,6 @@ +4.1.0 2018-??-?? + repmgr: add "--missing-slots" check to "repmgr node check" + 4.0.6 2018-06-14 repmgr: (witness register) prevent registration of a witness server with the same name as an existing node (Ian) diff --git a/doc/repmgr-node-check.sgml b/doc/repmgr-node-check.sgml index 94f422d7..9a80e949 100644 --- a/doc/repmgr-node-check.sgml +++ b/doc/repmgr-node-check.sgml @@ -77,6 +77,12 @@ + + + --missing-slots: checks there are no missing replication slots + + + diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 5c55e14e..81cf72c0 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -47,6 +47,7 @@ static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, Check static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); +static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); /* * NODE STATUS @@ -493,6 +494,7 @@ do_node_status(void) log_hint(_("execute \"repmgr node check\" for more details")); } + clear_node_info_list(&missing_slots); key_value_list_free(&node_status); item_list_free(&warnings); PQfinish(conn); @@ -712,6 +714,17 @@ do_node_check(void) exit(return_code); } + if (runtime_options.missing_slots == true) + { + return_code = do_node_check_missing_slots(conn, + runtime_options.output_mode, + &node_info, + NULL); + PQfinish(conn); + exit(return_code); + } + + if (runtime_options.output_mode == OM_NAGIOS) { log_error(_("--nagios can only be used with a specific check")); @@ -730,6 +743,7 @@ do_node_check(void) (void) do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list); (void) do_node_check_downstream(conn, runtime_options.output_mode, &status_list); (void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list); + (void) do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list); if (runtime_options.output_mode == OM_CSV) { @@ -1583,6 +1597,130 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check } +static CheckStatus +do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) +{ + CheckStatus status = CHECK_STATUS_OK; + PQExpBufferData details; + NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER; + + if (mode == OM_CSV && list_output == NULL) + { + log_error(_("--csv output not provided with --missing-slots option")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + initPQExpBuffer(&details); + + if (server_version_num < 90400) + { + appendPQExpBuffer(&details, + _("replication slots not available for this PostgreSQL version")); + } + else + { + get_downstream_nodes_with_missing_slot(conn, + config_file_options.node_id, + &missing_slots); + + if (missing_slots.node_count == 0) + { + appendPQExpBuffer(&details, + _("node has no missing replication slots")); + + } + else + { + NodeInfoListCell *missing_slot_cell = NULL; + bool first_element = true; + + status = CHECK_STATUS_CRITICAL; + + appendPQExpBuffer(&details, + _("%i replication slots are missing"), + missing_slots.node_count); + + if (missing_slots.node_count) + { + appendPQExpBuffer(&details, ": "); + + for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next) + { + if (first_element == true) + { + first_element = false; + } + else + { + appendPQExpBuffer(&details, ", "); + } + + appendPQExpBuffer(&details, "%s", missing_slot_cell->node_info->slot_name); + } + } + } + } + + switch (mode) + { + case OM_NAGIOS: + { + printf("REPMGR_MISSING_SLOTS %s: %s | missing_slots=%i", + output_check_status(status), + details.data, + missing_slots.node_count); + + if (missing_slots.node_count) + { + NodeInfoListCell *missing_slot_cell = NULL; + bool first_element = true; + + printf(";"); + + for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next) + { + if (first_element == true) + { + first_element = false; + } + else + { + printf(","); + } + printf("%s", missing_slot_cell->node_info->slot_name); + } + } + printf("\n"); + break; + } + case OM_CSV: + case OM_TEXT: + if (list_output != NULL) + { + check_status_list_set(list_output, + "Replication slots", + status, + details.data); + } + else + { + printf("%s (%s)\n", + output_check_status(status), + details.data); + } + default: + break; + } + + clear_node_info_list(&missing_slots); + + termPQExpBuffer(&details); + return status; +} + + + void do_node_service(void) { @@ -2664,6 +2802,7 @@ do_node_help(void) printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); printf(_(" --role check node has expected role\n")); printf(_(" --slots check for inactive replication slots\n")); + printf(_(" --missing-slots check for missing replication slots\n")); puts(""); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index d0e78a08..fd6f73c8 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -106,6 +106,7 @@ typedef struct bool replication_lag; bool role; bool slots; + bool missing_slots; bool has_passfile; bool replication_connection; @@ -158,7 +159,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, false, false, false, false, false, \ + false, false, false, false, false, false, false, false, \ /* "node join" options */ \ "", \ /* "node service" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index bd3fecab..48069246 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -473,6 +473,10 @@ main(int argc, char **argv) runtime_options.slots = true; break; + case OPT_MISSING_SLOTS: + runtime_options.missing_slots = true; + break; + case OPT_HAS_PASSFILE: runtime_options.has_passfile = true; break; diff --git a/repmgr-client.h b/repmgr-client.h index f8bcc3b7..19d64fb2 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -87,6 +87,7 @@ #define OPT_REMOTE_NODE_ID 1038 #define OPT_RECOVERY_CONF_ONLY 1039 #define OPT_NO_WAIT 1040 +#define OPT_MISSING_SLOTS 1041 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 @@ -164,6 +165,7 @@ static struct option long_options[] = {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG}, {"role", no_argument, NULL, OPT_ROLE}, {"slots", no_argument, NULL, OPT_SLOTS}, + {"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS}, {"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE}, {"replication-connection", no_argument, NULL, OPT_REPL_CONN},