node check: implement CSV output

This is advertised in the --help output and placeholder code was in
place, but it wasn't actually implemented.
This commit is contained in:
Ian Barwick
2018-06-22 13:14:57 +09:00
committed by Ian Barwick
parent d26989bd12
commit 1e5f63792f
2 changed files with 384 additions and 328 deletions

View File

@@ -79,9 +79,26 @@
</itemizedlist> </itemizedlist>
</para> </para>
<para> </refsect1>
Individual checks can also be output in a Nagios-compatible format by additionally
providing the option <literal>--nagios</literal>. <refsect1>
</para> <title>Output format</title>
<para>
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
<literal>--csv</literal>: generate output in CSV format (not available
for individual checks)
</simpara>
</listitem>
<listitem>
<simpara>
<literal>--nagios</literal>: generate output in a Nagios-compatible format
</simpara>
</listitem>
</itemizedlist>
</para>
</refsect1> </refsect1>
</refentry> </refentry>

View File

@@ -508,8 +508,7 @@ do_node_status(void)
* --last-checkpoint=... * --last-checkpoint=...
*/ */
static static void
void
_do_node_status_is_shutdown_cleanly(void) _do_node_status_is_shutdown_cleanly(void)
{ {
PGPing ping_status; PGPing ping_status;
@@ -630,7 +629,7 @@ do_node_check(void)
CheckStatusListCell *cell = NULL; CheckStatusListCell *cell = NULL;
/* internal */ /* for internal use */
if (runtime_options.has_passfile == true) if (runtime_options.has_passfile == true)
{ {
return_code = has_passfile() ? 0 : 1; return_code = has_passfile() ? 0 : 1;
@@ -718,7 +717,7 @@ do_node_check(void)
log_error(_("--nagios can only be used with a specific check")); log_error(_("--nagios can only be used with a specific check"));
log_hint(_("execute \"repmgr node --help\" for details")); log_hint(_("execute \"repmgr node --help\" for details"));
PQfinish(conn); PQfinish(conn);
return; exit(ERR_BAD_CONFIG);
} }
/* output general overview */ /* output general overview */
@@ -734,27 +733,46 @@ do_node_check(void)
if (runtime_options.output_mode == OM_CSV) if (runtime_options.output_mode == OM_CSV)
{ {
/* TODO */ appendPQExpBuffer(&output,
"\"Node name\",\"%s\"\n",
node_info.node_name);
appendPQExpBuffer(&output,
"\"Node ID\",\"%i\"\n",
node_info.node_id);
for (cell = status_list.head; cell; cell = cell->next)
{
appendPQExpBuffer(&output,
"\"%s\",\"%s\"",
cell->item,
output_check_status(cell->status));
if (strlen(cell->details))
{
appendPQExpBuffer(&output,
",\"%s\"",
cell->details);
}
appendPQExpBuffer(&output, "\n");
}
} }
else else
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"Node \"%s\":\n", "Node \"%s\":\n",
node_info.node_name); node_info.node_name);
for (cell = status_list.head; cell; cell = cell->next) for (cell = status_list.head; cell; cell = cell->next)
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\t%s: %s", "\t%s: %s",
cell->item, cell->item,
output_check_status(cell->status)); output_check_status(cell->status));
if (strlen(cell->details)) if (strlen(cell->details))
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
" (%s)", " (%s)",
cell->details); cell->details);
} }
@@ -771,194 +789,6 @@ do_node_check(void)
} }
static CheckStatus
do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
PQExpBufferData details;
RecoveryType recovery_type = get_recovery_type(conn);
if (mode == OM_CSV)
{
log_error(_("--csv output not provided with --role option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
switch (node_info->type)
{
case PRIMARY:
if (recovery_type == RECTYPE_STANDBY)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is registered as primary but running as standby"));
}
else
{
appendPQExpBuffer(&details,
_("node is primary"));
}
break;
case STANDBY:
if (recovery_type == RECTYPE_PRIMARY)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is registered as standby but running as primary"));
}
else
{
appendPQExpBuffer(&details,
_("node is standby"));
}
break;
case WITNESS:
if (recovery_type == RECTYPE_STANDBY)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is registered as witness but running as standby"));
}
else
{
appendPQExpBuffer(&details,
_("node is witness"));
}
break;
case BDR:
{
PQExpBufferData output;
initPQExpBuffer(&output);
if (is_bdr_db(conn, &output) == false)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
"%s", output.data);
}
termPQExpBuffer(&output);
if (status == CHECK_STATUS_OK)
{
if (is_active_bdr_node(conn, node_info->node_name) == false)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is not an active BDR node"));
}
else
{
appendPQExpBuffer(&details,
_("node is an active BDR node"));
}
}
}
default:
break;
}
switch (mode)
{
case OM_NAGIOS:
printf("REPMGR_SERVER_ROLE %s: %s\n",
output_check_status(status),
details.data);
break;
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Server role",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details);
return status;
}
static CheckStatus
do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
PQExpBufferData details;
initPQExpBuffer(&details);
if (server_version_num < 90400)
{
appendPQExpBuffer(&details,
_("replication slots not available for this PostgreSQL version"));
}
else if (node_info->total_replication_slots == 0)
{
appendPQExpBuffer(&details,
_("node has no replication slots"));
}
else if (node_info->inactive_replication_slots == 0)
{
appendPQExpBuffer(&details,
_("%i of %i replication slots are active"),
node_info->total_replication_slots,
node_info->total_replication_slots);
}
else if (node_info->inactive_replication_slots > 0)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("%i of %i replication slots are inactive"),
node_info->inactive_replication_slots,
node_info->total_replication_slots);
}
switch (mode)
{
case OM_NAGIOS:
printf("REPMGR_INACTIVE_SLOTS %s: %s | slots=%i;%i\n",
output_check_status(status),
details.data,
node_info->total_replication_slots,
node_info->inactive_replication_slots);
break;
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Replication slots",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details);
return status;
}
static void static void
do_node_check_replication_connection(void) do_node_check_replication_connection(void)
{ {
@@ -1025,6 +855,8 @@ do_node_check_replication_connection(void)
return; return;
} }
static CheckStatus static CheckStatus
do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output) do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
{ {
@@ -1032,7 +864,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
CheckStatus status = CHECK_STATUS_UNKNOWN; CheckStatus status = CHECK_STATUS_UNKNOWN;
PQExpBufferData details; PQExpBufferData details;
if (mode == OM_CSV) if (mode == OM_CSV && list_output == NULL)
{ {
log_error(_("--csv output not provided with --archive-ready option")); log_error(_("--csv output not provided with --archive-ready option"));
PQfinish(conn); PQfinish(conn);
@@ -1163,6 +995,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
output_check_status(status), output_check_status(status),
details.data); details.data);
break; break;
case OM_CSV:
case OM_TEXT: case OM_TEXT:
if (list_output != NULL) if (list_output != NULL)
{ {
@@ -1186,6 +1019,174 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
} }
static CheckStatus
do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
{
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
NodeInfoListCell *cell = NULL;
int missing_nodes_count = 0;
int expected_nodes_count = 0;
CheckStatus status = CHECK_STATUS_OK;
ItemList missing_nodes = {NULL, NULL};
ItemList attached_nodes = {NULL, NULL};
PQExpBufferData details;
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --downstream option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
get_downstream_node_records(conn, config_file_options.node_id, &downstream_nodes);
/* if a witness node is present, we'll need to remove this from the total */
expected_nodes_count = downstream_nodes.node_count;
for (cell = downstream_nodes.head; cell; cell = cell->next)
{
if (cell->node_info->type == WITNESS)
{
expected_nodes_count --;
continue;
}
if (is_downstream_node_attached(conn, cell->node_info->node_name) == false)
{
missing_nodes_count++;
item_list_append_format(&missing_nodes,
"%s (ID: %i)",
cell->node_info->node_name,
cell->node_info->node_id);
}
else
{
item_list_append_format(&attached_nodes,
"%s (ID: %i)",
cell->node_info->node_name,
cell->node_info->node_id);
}
}
if (missing_nodes_count == 0)
{
if (expected_nodes_count == 0)
appendPQExpBuffer(&details,
"this node has no downstream nodes");
else
appendPQExpBuffer(&details,
"%i of %i downstream nodes attached",
expected_nodes_count - missing_nodes_count,
expected_nodes_count);
}
else
{
ItemListCell *missing_cell = NULL;
bool first = true;
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
"%i of %i downstream nodes not attached",
missing_nodes_count,
expected_nodes_count);
if (mode != OM_NAGIOS)
{
appendPQExpBuffer(&details, "; missing: ");
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
{
if (first == false)
appendPQExpBuffer(&details,
", ");
else
first = false;
if (first == false)
appendPQExpBuffer(
&details,
"%s", missing_cell->string);
}
}
}
switch (mode)
{
case OM_NAGIOS:
{
printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | ",
output_check_status(status),
details.data);
if (missing_nodes_count)
{
ItemListCell *missing_cell = NULL;
bool first = true;
printf("missing: ");
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
{
if (first == false)
printf(", ");
else
first = false;
if (first == false)
printf("%s", missing_cell->string);
}
}
if (expected_nodes_count - missing_nodes_count)
{
ItemListCell *attached_cell = NULL;
bool first = true;
if (missing_nodes_count)
printf("; ");
printf("attached: ");
for (attached_cell = attached_nodes.head; attached_cell; attached_cell = attached_cell->next)
{
if (first == false)
printf(", ");
else
first = false;
if (first == false)
printf("%s", attached_cell->string);
}
}
printf("\n");
}
break;
case OM_CSV:
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Downstream servers",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details);
clear_node_info_list(&downstream_nodes);
return status;
}
static CheckStatus static CheckStatus
do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{ {
@@ -1193,7 +1194,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
int lag_seconds = 0; int lag_seconds = 0;
PQExpBufferData details; PQExpBufferData details;
if (mode == OM_CSV) if (mode == OM_CSV && list_output == NULL)
{ {
log_error(_("--csv output not provided with --replication-lag option")); log_error(_("--csv output not provided with --replication-lag option"));
PQfinish(conn); PQfinish(conn);
@@ -1360,6 +1361,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
output_check_status(status), output_check_status(status),
details.data); details.data);
break; break;
case OM_CSV:
case OM_TEXT: case OM_TEXT:
if (list_output != NULL) if (list_output != NULL)
{ {
@@ -1385,146 +1387,108 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
static CheckStatus static CheckStatus
do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output) do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{ {
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
NodeInfoListCell *cell = NULL;
int missing_nodes_count = 0;
int expected_nodes_count = 0;
CheckStatus status = CHECK_STATUS_OK; CheckStatus status = CHECK_STATUS_OK;
ItemList missing_nodes = {NULL, NULL};
ItemList attached_nodes = {NULL, NULL};
PQExpBufferData details; PQExpBufferData details;
RecoveryType recovery_type = get_recovery_type(conn);
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --role option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details); initPQExpBuffer(&details);
get_downstream_node_records(conn, config_file_options.node_id, &downstream_nodes); switch (node_info->type)
/* if a witness node is present, we'll need to remove this from the total */
expected_nodes_count = downstream_nodes.node_count;
for (cell = downstream_nodes.head; cell; cell = cell->next)
{ {
if (cell->node_info->type == WITNESS) case PRIMARY:
{ if (recovery_type == RECTYPE_STANDBY)
expected_nodes_count --;
continue;
}
if (is_downstream_node_attached(conn, cell->node_info->node_name) == false)
{
missing_nodes_count++;
item_list_append_format(&missing_nodes,
"%s (ID: %i)",
cell->node_info->node_name,
cell->node_info->node_id);
}
else
{
item_list_append_format(&attached_nodes,
"%s (ID: %i)",
cell->node_info->node_name,
cell->node_info->node_id);
}
}
if (missing_nodes_count == 0)
{
if (expected_nodes_count == 0)
appendPQExpBuffer(&details,
"this node has no downstream nodes");
else
appendPQExpBuffer(&details,
"%i of %i downstream nodes attached",
expected_nodes_count - missing_nodes_count,
expected_nodes_count);
}
else
{
ItemListCell *missing_cell = NULL;
bool first = true;
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
"%i of %i downstream nodes not attached",
missing_nodes_count,
expected_nodes_count);
if (mode != OM_NAGIOS)
{
appendPQExpBuffer(&details, "; missing: ");
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
{ {
if (first == false) status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details, appendPQExpBuffer(&details,
", "); _("node is registered as primary but running as standby"));
else
first = false;
if (first == false)
appendPQExpBuffer(
&details,
"%s", missing_cell->string);
} }
} else
{
appendPQExpBuffer(&details,
_("node is primary"));
}
break;
case STANDBY:
if (recovery_type == RECTYPE_PRIMARY)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is registered as standby but running as primary"));
}
else
{
appendPQExpBuffer(&details,
_("node is standby"));
}
break;
case WITNESS:
if (recovery_type == RECTYPE_STANDBY)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is registered as witness but running as standby"));
}
else
{
appendPQExpBuffer(&details,
_("node is witness"));
}
break;
case BDR:
{
PQExpBufferData output;
initPQExpBuffer(&output);
if (is_bdr_db(conn, &output) == false)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
"%s", output.data);
}
termPQExpBuffer(&output);
if (status == CHECK_STATUS_OK)
{
if (is_active_bdr_node(conn, node_info->node_name) == false)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("node is not an active BDR node"));
}
else
{
appendPQExpBuffer(&details,
_("node is an active BDR node"));
}
}
}
default:
break;
} }
switch (mode) switch (mode)
{ {
case OM_NAGIOS: case OM_NAGIOS:
{ printf("REPMGR_SERVER_ROLE %s: %s\n",
printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | ", output_check_status(status),
output_check_status(status), details.data);
details.data);
if (missing_nodes_count)
{
ItemListCell *missing_cell = NULL;
bool first = true;
printf("missing: ");
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
{
if (first == false)
printf(", ");
else
first = false;
if (first == false)
printf("%s", missing_cell->string);
}
}
if (expected_nodes_count - missing_nodes_count)
{
ItemListCell *attached_cell = NULL;
bool first = true;
if (missing_nodes_count)
printf("; ");
printf("attached: ");
for (attached_cell = attached_nodes.head; attached_cell; attached_cell = attached_cell->next)
{
if (first == false)
printf(", ");
else
first = false;
if (first == false)
printf("%s", attached_cell->string);
}
}
printf("\n");
}
break; break;
case OM_CSV:
case OM_TEXT: case OM_TEXT:
if (list_output != NULL) if (list_output != NULL)
{ {
check_status_list_set(list_output, check_status_list_set(list_output,
"Downstream servers", "Server role",
status, status,
details.data); details.data);
} }
@@ -1536,10 +1500,85 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
} }
default: default:
break; break;
} }
termPQExpBuffer(&details);
return status;
}
static CheckStatus
do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
PQExpBufferData details;
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --slots option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
if (server_version_num < 90400)
{
appendPQExpBuffer(&details,
_("replication slots not available for this PostgreSQL version"));
}
else if (node_info->total_replication_slots == 0)
{
appendPQExpBuffer(&details,
_("node has no replication slots"));
}
else if (node_info->inactive_replication_slots == 0)
{
appendPQExpBuffer(&details,
_("%i of %i replication slots are active"),
node_info->total_replication_slots,
node_info->total_replication_slots);
}
else if (node_info->inactive_replication_slots > 0)
{
status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer(&details,
_("%i of %i replication slots are inactive"),
node_info->inactive_replication_slots,
node_info->total_replication_slots);
}
switch (mode)
{
case OM_NAGIOS:
printf("REPMGR_INACTIVE_SLOTS %s: %s | slots=%i;%i\n",
output_check_status(status),
details.data,
node_info->total_replication_slots,
node_info->inactive_replication_slots);
break;
case OM_CSV:
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Replication slots",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details); termPQExpBuffer(&details);
clear_node_info_list(&downstream_nodes);
return status; return status;
} }