mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
node check: add --missing-slots check
This enables an explicit check for slots which should exist (according to the repmgr metadata) but which aren't present.
This commit is contained in:
3
HISTORY
3
HISTORY
@@ -1,3 +1,6 @@
|
||||
4.1.0 2018-??-??
|
||||
repmgr: add "--missing-slots" check to "repmgr node check"
|
||||
|
||||
4.0.6 2018-06-14
|
||||
repmgr: (witness register) prevent registration of a witness server with the
|
||||
same name as an existing node (Ian)
|
||||
|
||||
@@ -77,6 +77,12 @@
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--missing-slots</literal>: checks there are no missing replication slots
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -47,6 +47,7 @@ static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, Check
|
||||
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
|
||||
/*
|
||||
* NODE STATUS
|
||||
@@ -493,6 +494,7 @@ do_node_status(void)
|
||||
log_hint(_("execute \"repmgr node check\" for more details"));
|
||||
}
|
||||
|
||||
clear_node_info_list(&missing_slots);
|
||||
key_value_list_free(&node_status);
|
||||
item_list_free(&warnings);
|
||||
PQfinish(conn);
|
||||
@@ -712,6 +714,17 @@ do_node_check(void)
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.missing_slots == true)
|
||||
{
|
||||
return_code = do_node_check_missing_slots(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
|
||||
if (runtime_options.output_mode == OM_NAGIOS)
|
||||
{
|
||||
log_error(_("--nagios can only be used with a specific check"));
|
||||
@@ -730,6 +743,7 @@ do_node_check(void)
|
||||
(void) do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list);
|
||||
(void) do_node_check_downstream(conn, runtime_options.output_mode, &status_list);
|
||||
(void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
(void) do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
@@ -1583,6 +1597,130 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
||||
}
|
||||
|
||||
|
||||
static CheckStatus
|
||||
do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||
{
|
||||
CheckStatus status = CHECK_STATUS_OK;
|
||||
PQExpBufferData details;
|
||||
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
if (mode == OM_CSV && list_output == NULL)
|
||||
{
|
||||
log_error(_("--csv output not provided with --missing-slots option"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
|
||||
if (server_version_num < 90400)
|
||||
{
|
||||
appendPQExpBuffer(&details,
|
||||
_("replication slots not available for this PostgreSQL version"));
|
||||
}
|
||||
else
|
||||
{
|
||||
get_downstream_nodes_with_missing_slot(conn,
|
||||
config_file_options.node_id,
|
||||
&missing_slots);
|
||||
|
||||
if (missing_slots.node_count == 0)
|
||||
{
|
||||
appendPQExpBuffer(&details,
|
||||
_("node has no missing replication slots"));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
NodeInfoListCell *missing_slot_cell = NULL;
|
||||
bool first_element = true;
|
||||
|
||||
status = CHECK_STATUS_CRITICAL;
|
||||
|
||||
appendPQExpBuffer(&details,
|
||||
_("%i replication slots are missing"),
|
||||
missing_slots.node_count);
|
||||
|
||||
if (missing_slots.node_count)
|
||||
{
|
||||
appendPQExpBuffer(&details, ": ");
|
||||
|
||||
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
||||
{
|
||||
if (first_element == true)
|
||||
{
|
||||
first_element = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&details, ", ");
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&details, "%s", missing_slot_cell->node_info->slot_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case OM_NAGIOS:
|
||||
{
|
||||
printf("REPMGR_MISSING_SLOTS %s: %s | missing_slots=%i",
|
||||
output_check_status(status),
|
||||
details.data,
|
||||
missing_slots.node_count);
|
||||
|
||||
if (missing_slots.node_count)
|
||||
{
|
||||
NodeInfoListCell *missing_slot_cell = NULL;
|
||||
bool first_element = true;
|
||||
|
||||
printf(";");
|
||||
|
||||
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
||||
{
|
||||
if (first_element == true)
|
||||
{
|
||||
first_element = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(",");
|
||||
}
|
||||
printf("%s", missing_slot_cell->node_info->slot_name);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
case OM_CSV:
|
||||
case OM_TEXT:
|
||||
if (list_output != NULL)
|
||||
{
|
||||
check_status_list_set(list_output,
|
||||
"Replication slots",
|
||||
status,
|
||||
details.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%s (%s)\n",
|
||||
output_check_status(status),
|
||||
details.data);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
clear_node_info_list(&missing_slots);
|
||||
|
||||
termPQExpBuffer(&details);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
do_node_service(void)
|
||||
{
|
||||
@@ -2664,6 +2802,7 @@ do_node_help(void)
|
||||
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
|
||||
printf(_(" --role check node has expected role\n"));
|
||||
printf(_(" --slots check for inactive replication slots\n"));
|
||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||
|
||||
puts("");
|
||||
|
||||
|
||||
@@ -106,6 +106,7 @@ typedef struct
|
||||
bool replication_lag;
|
||||
bool role;
|
||||
bool slots;
|
||||
bool missing_slots;
|
||||
bool has_passfile;
|
||||
bool replication_connection;
|
||||
|
||||
@@ -158,7 +159,7 @@ typedef struct
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
false, false, false, false, false, false, false, \
|
||||
false, false, false, false, false, false, false, false, \
|
||||
/* "node join" options */ \
|
||||
"", \
|
||||
/* "node service" options */ \
|
||||
|
||||
@@ -473,6 +473,10 @@ main(int argc, char **argv)
|
||||
runtime_options.slots = true;
|
||||
break;
|
||||
|
||||
case OPT_MISSING_SLOTS:
|
||||
runtime_options.missing_slots = true;
|
||||
break;
|
||||
|
||||
case OPT_HAS_PASSFILE:
|
||||
runtime_options.has_passfile = true;
|
||||
break;
|
||||
|
||||
@@ -87,6 +87,7 @@
|
||||
#define OPT_REMOTE_NODE_ID 1038
|
||||
#define OPT_RECOVERY_CONF_ONLY 1039
|
||||
#define OPT_NO_WAIT 1040
|
||||
#define OPT_MISSING_SLOTS 1041
|
||||
|
||||
/* deprecated since 3.3 */
|
||||
#define OPT_DATA_DIR 999
|
||||
@@ -164,6 +165,7 @@ static struct option long_options[] =
|
||||
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
|
||||
{"role", no_argument, NULL, OPT_ROLE},
|
||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||
|
||||
|
||||
Reference in New Issue
Block a user