node check: add --missing-slots check

This enables an explicit check for slots which should exist (according
to the repmgr metadata) but which aren't present.
This commit is contained in:
Ian Barwick
2018-06-22 17:19:56 +09:00
parent dd7a4068d2
commit 080a29c33b
6 changed files with 156 additions and 1 deletions

View File

@@ -1,3 +1,6 @@
4.1.0 2018-??-??
repmgr: add "--missing-slots" check to "repmgr node check"
4.0.6 2018-06-14
repmgr: (witness register) prevent registration of a witness server with the
same name as an existing node (Ian)

View File

@@ -77,6 +77,12 @@
</simpara>
</listitem>
<listitem>
<simpara>
<literal>--missing-slots</literal>: checks there are no missing replication slots
</simpara>
</listitem>
</itemizedlist>
</para>
</refsect1>

View File

@@ -47,6 +47,7 @@ static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, Check
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
/*
* NODE STATUS
@@ -493,6 +494,7 @@ do_node_status(void)
log_hint(_("execute \"repmgr node check\" for more details"));
}
clear_node_info_list(&missing_slots);
key_value_list_free(&node_status);
item_list_free(&warnings);
PQfinish(conn);
@@ -712,6 +714,17 @@ do_node_check(void)
exit(return_code);
}
if (runtime_options.missing_slots == true)
{
return_code = do_node_check_missing_slots(conn,
runtime_options.output_mode,
&node_info,
NULL);
PQfinish(conn);
exit(return_code);
}
if (runtime_options.output_mode == OM_NAGIOS)
{
log_error(_("--nagios can only be used with a specific check"));
@@ -730,6 +743,7 @@ do_node_check(void)
(void) do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list);
(void) do_node_check_downstream(conn, runtime_options.output_mode, &status_list);
(void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list);
(void) do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list);
if (runtime_options.output_mode == OM_CSV)
{
@@ -1583,6 +1597,130 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
}
static CheckStatus
do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
PQExpBufferData details;
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --missing-slots option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
if (server_version_num < 90400)
{
appendPQExpBuffer(&details,
_("replication slots not available for this PostgreSQL version"));
}
else
{
get_downstream_nodes_with_missing_slot(conn,
config_file_options.node_id,
&missing_slots);
if (missing_slots.node_count == 0)
{
appendPQExpBuffer(&details,
_("node has no missing replication slots"));
}
else
{
NodeInfoListCell *missing_slot_cell = NULL;
bool first_element = true;
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("%i replication slots are missing"),
missing_slots.node_count);
if (missing_slots.node_count)
{
appendPQExpBuffer(&details, ": ");
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
{
if (first_element == true)
{
first_element = false;
}
else
{
appendPQExpBuffer(&details, ", ");
}
appendPQExpBuffer(&details, "%s", missing_slot_cell->node_info->slot_name);
}
}
}
}
switch (mode)
{
case OM_NAGIOS:
{
printf("REPMGR_MISSING_SLOTS %s: %s | missing_slots=%i",
output_check_status(status),
details.data,
missing_slots.node_count);
if (missing_slots.node_count)
{
NodeInfoListCell *missing_slot_cell = NULL;
bool first_element = true;
printf(";");
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
{
if (first_element == true)
{
first_element = false;
}
else
{
printf(",");
}
printf("%s", missing_slot_cell->node_info->slot_name);
}
}
printf("\n");
break;
}
case OM_CSV:
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Replication slots",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
clear_node_info_list(&missing_slots);
termPQExpBuffer(&details);
return status;
}
void
do_node_service(void)
{
@@ -2664,6 +2802,7 @@ do_node_help(void)
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n"));
printf(_(" --missing-slots check for missing replication slots\n"));
puts("");

View File

@@ -106,6 +106,7 @@ typedef struct
bool replication_lag;
bool role;
bool slots;
bool missing_slots;
bool has_passfile;
bool replication_connection;
@@ -158,7 +159,7 @@ typedef struct
/* "node status" options */ \
false, \
/* "node check" options */ \
false, false, false, false, false, false, false, \
false, false, false, false, false, false, false, false, \
/* "node join" options */ \
"", \
/* "node service" options */ \

View File

@@ -473,6 +473,10 @@ main(int argc, char **argv)
runtime_options.slots = true;
break;
case OPT_MISSING_SLOTS:
runtime_options.missing_slots = true;
break;
case OPT_HAS_PASSFILE:
runtime_options.has_passfile = true;
break;

View File

@@ -87,6 +87,7 @@
#define OPT_REMOTE_NODE_ID 1038
#define OPT_RECOVERY_CONF_ONLY 1039
#define OPT_NO_WAIT 1040
#define OPT_MISSING_SLOTS 1041
/* deprecated since 3.3 */
#define OPT_DATA_DIR 999
@@ -164,6 +165,7 @@ static struct option long_options[] =
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
{"role", no_argument, NULL, OPT_ROLE},
{"slots", no_argument, NULL, OPT_SLOTS},
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},