"node check": initial general output

This commit is contained in:
Ian Barwick
2017-08-14 17:32:44 +09:00
parent 3b2158edbf
commit fa7d60cd51
5 changed files with 313 additions and 137 deletions

View File

@@ -29,6 +29,7 @@ static void _do_node_status_is_shutdown(void);
static void _do_node_archive_config(void); static void _do_node_archive_config(void);
static void _do_node_restore_config(void); static void _do_node_restore_config(void);
void void
do_node_status(void) do_node_status(void)
{ {
@@ -282,14 +283,6 @@ do_node_status(void)
"disabled"); "disabled");
} }
// if standby (and in recovery), show:
// upstream
// -> check if matches expected; parse recovery.conf for < 9.6 (must be superuser),
// otherwise use pg_stat_wal_receiver
// streaming/in archive recovery/disconnected
// last received
// last replayed
// lag if streaming, or if in recovery can compare with upstream
if (node_info.type == STANDBY) if (node_info.type == STANDBY)
{ {
@@ -511,17 +504,33 @@ void _do_node_status_is_shutdown(void)
return; return;
} }
/*
* Configuration file required
*/
void void
do_node_check(void) do_node_check(void)
{ {
PGconn *conn = NULL; PGconn *conn = NULL;
PQExpBufferData output;
t_node_info node_info = T_NODE_INFO_INITIALIZER;
CheckStatusList status_list = { NULL, NULL };
CheckStatusListCell *cell = NULL;
if (strlen(config_file_options.conninfo)) if (strlen(config_file_options.conninfo))
conn = establish_db_connection(config_file_options.conninfo, true); conn = establish_db_connection(config_file_options.conninfo, true);
else else
conn = establish_db_connection_by_params(&source_conninfo, true); conn = establish_db_connection_by_params(&source_conninfo, true);
if (get_node_record(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
{
log_error(_("no record found for node %i"), config_file_options.node_id);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* handle specific checks /* handle specific checks
* ====================== */ * ====================== */
if (runtime_options.archiver == true) if (runtime_options.archiver == true)
@@ -538,17 +547,59 @@ do_node_check(void)
return; return;
} }
/* output general overview */
initPQExpBuffer(&output);
//(void) do_node_check_role(conn, runtime_options.output_mode, &output);
(void) do_node_check_replication_lag(conn, runtime_options.output_mode, &status_list);
(void) do_node_check_archiver(conn, runtime_options.output_mode, &status_list);
if (runtime_options.output_mode == OM_CSV)
{
/* TODO */
}
else
{
appendPQExpBuffer(
&output,
"Node \"%s\":\n",
node_info.node_name);
for (cell = status_list.head; cell; cell = cell->next)
{
appendPQExpBuffer(
&output,
"\t%s: %s",
cell->item,
output_check_status(cell->status));
if (strlen(cell->details))
{
appendPQExpBuffer(
&output,
" (%s)",
cell->details);
}
appendPQExpBuffer(&output, "\n");
}
}
printf("%s", output.data);
termPQExpBuffer(&output);
check_status_list_free(&status_list);
PQfinish(conn); PQfinish(conn);
} }
CheckStatus CheckStatus
do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output) do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
{ {
bool own_buffer = false;
int ready_archive_files = 0; int ready_archive_files = 0;
PQExpBufferData buf;
CheckStatus status = CHECK_STATUS_UNKNOWN; CheckStatus status = CHECK_STATUS_UNKNOWN;
PQExpBufferData details;
if (mode == OM_CSV) if (mode == OM_CSV)
{ {
@@ -556,13 +607,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (output == NULL) initPQExpBuffer(&details);
{
initPQExpBuffer(&buf);
output = &buf;
own_buffer = true;
}
ready_archive_files = get_ready_archive_files(conn, config_file_options.data_directory); ready_archive_files = get_ready_archive_files(conn, config_file_options.data_directory);
@@ -574,24 +619,21 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=CRITICAL --files=%i --threshold=%i", "--files=%i --threshold=%i",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_critical);
config_file_options.archiver_lag_critical);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"PG_ARCHIVER CRITICAL: %i pending files (critical: %i)", "%i pending files (critical: %i)",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_critical);
config_file_options.archiver_lag_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"CRITICAL - %i pending files (threshold: %i)", "%i pending files, threshold: %i",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_critical);
config_file_options.archiver_lag_critical);
break; break;
default: default:
@@ -606,24 +648,21 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=WARNING --files=%i --threshold=%i", "--files=%i --threshold=%i",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_warning);
config_file_options.archiver_lag_warning);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"PG_ARCHIVER WARNING: %i pending files (warning: %i)", "%i pending files (warning: %i)",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_warning);
config_file_options.archiver_lag_warning);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"WARNING - %i pending files (threshold: %i)", "%i pending files (threshold: %i)",
ready_archive_files, ready_archive_files, config_file_options.archiver_lag_warning);
config_file_options.archiver_lag_warning);
break; break;
default: default:
@@ -637,19 +676,12 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer(
output,
"--status=UNKNOWN");
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer(
output,
"PG_ARCHIVER UNKNOWN: unable to check archive_status directory");
break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"UNKNOWN - unable to check archive_status directory"); "unable to check archive_status directory");
break; break;
default: default:
@@ -664,21 +696,14 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=OK --files=%i", "--files=%i", ready_archive_files);
ready_archive_files);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer(
output,
"PG_ARCHIVER OK: %i pending files",
ready_archive_files);
break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"OK - %i pending files", "%i pending files", ready_archive_files);
ready_archive_files);
break; break;
default: default:
@@ -686,23 +711,49 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output)
} }
} }
if (own_buffer == true) switch (mode)
{ {
printf("%s\n", buf.data); case OM_OPTFORMAT:
termPQExpBuffer(&buf); {
printf("--status=%s %s\n",
output_check_status(status),
details.data);
}
break;
case OM_NAGIOS:
printf("PG_ARCHIVER %s: %s\n",
output_check_status(status),
details.data);
break;
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"WAL archiving",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
} }
termPQExpBuffer(&details);
return status; return status;
} }
CheckStatus CheckStatus
do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output) do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
{ {
CheckStatus status = CHECK_STATUS_UNKNOWN; CheckStatus status = CHECK_STATUS_UNKNOWN;
bool own_buffer = false;
PQExpBufferData buf;
int lag_seconds = 0; int lag_seconds = 0;
PQExpBufferData details;
if (mode == OM_CSV) if (mode == OM_CSV)
{ {
@@ -710,12 +761,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (output == NULL) initPQExpBuffer(&details);
{
initPQExpBuffer(&buf);
output = &buf;
own_buffer = true;
}
lag_seconds = get_replication_lag_seconds(conn); lag_seconds = get_replication_lag_seconds(conn);
@@ -729,24 +775,21 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=CRITICAL --lag=%i --threshold=%i", "--lag=%i --threshold=%i",
lag_seconds, lag_seconds, config_file_options.replication_lag_critical);
config_file_options.replication_lag_critical);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"PG_REPLICATION_LAG CRITICAL: %i seconds (critical: %i)", "%i seconds (critical: %i)",
lag_seconds, lag_seconds, config_file_options.replication_lag_critical);
config_file_options.replication_lag_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"CRITICAL - %i seconds (threshold: %i)", "%i seconds, threshold: %i)",
lag_seconds, lag_seconds, config_file_options.replication_lag_critical);
config_file_options.replication_lag_critical);
break; break;
default: default:
@@ -761,24 +804,21 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=WARNING --lag=%i --threshold=%i", "--lag=%i --threshold=%i",
lag_seconds, lag_seconds, config_file_options.replication_lag_warning);
config_file_options.replication_lag_warning);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"PG_REPLICATION_LAG WARNING: %i seconds (warning: %i)", "%i seconds (warning: %i)",
lag_seconds, lag_seconds, config_file_options.replication_lag_warning);
config_file_options.replication_lag_warning);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"WARNING - %i seconds (threshold: %i)", "%i seconds, threshold: %i)",
lag_seconds, lag_seconds, config_file_options.replication_lag_warning);
config_file_options.replication_lag_warning);
break; break;
default: default:
@@ -792,19 +832,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer(
output,
"--status=UNKNOWN");
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer(
output,
"PG_REPLICATION_LAG UNKNOWN: unable to query replication lag");
break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"UNKNOWN - unable to query replication lag"); "unable to query replication lag");
break; break;
default: default:
@@ -819,20 +852,15 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"--status=OK --files=%i", "--lag=%i",
lag_seconds); lag_seconds);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer(
output,
"PG_REPLICATION_LAG OK: %i seconds",
lag_seconds);
break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(
output, &details,
"OK - %i seconds", "%i seconds",
lag_seconds); lag_seconds);
break; break;
@@ -841,15 +869,40 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *ou
} }
} }
switch (mode)
if (own_buffer == true)
{ {
printf("%s\n", buf.data); case OM_OPTFORMAT:
termPQExpBuffer(&buf); {
printf("--status=%s %s\n",
output_check_status(status),
details.data);
}
break;
case OM_NAGIOS:
printf("PG_REPLICATION_LAG %s: %s\n",
output_check_status(status),
details.data);
break;
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Replication lag",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
} }
termPQExpBuffer(&details);
return status; return status;
} }
@@ -1572,3 +1625,5 @@ copy_file(const char *src_file, const char *dest_file)
return true; return true;
} }

View File

@@ -8,8 +8,11 @@
extern void do_node_status(void); extern void do_node_status(void);
extern void do_node_check(void); extern void do_node_check(void);
extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output); //extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output);
extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output); extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
extern void do_node_rejoin(void); extern void do_node_rejoin(void);
extern void do_node_service(void); extern void do_node_service(void);

View File

@@ -159,13 +159,6 @@ typedef enum {
} t_server_action; } t_server_action;
typedef enum {
CHECK_STATUS_OK = 0,
CHECK_STATUS_WARNING,
CHECK_STATUS_CRITICAL,
CHECK_STATUS_UNKNOWN
} CheckStatus;
/* global configuration structures */ /* global configuration structures */
extern t_runtime_options runtime_options; extern t_runtime_options runtime_options;

View File

@@ -130,8 +130,8 @@ item_list_append_format(ItemList *item_list, const char *format, ...)
void void
item_list_free(ItemList *item_list) item_list_free(ItemList *item_list)
{ {
ItemListCell *cell; ItemListCell *cell = NULL;
ItemListCell *next_cell; ItemListCell *next_cell = NULL;
cell = item_list->head; cell = item_list->head;
@@ -155,9 +155,9 @@ key_value_list_set(KeyValueList *item_list, const char *key, const char *value)
void void
key_value_list_set_format(KeyValueList *item_list, const char *key, const char *value, ...) key_value_list_set_format(KeyValueList *item_list, const char *key, const char *value, ...)
{ {
KeyValueListCell *cell; KeyValueListCell *cell = NULL;
va_list arglist; va_list arglist;
int keylen; int keylen = 0;
cell = (KeyValueListCell *) pg_malloc0(sizeof(KeyValueListCell)); cell = (KeyValueListCell *) pg_malloc0(sizeof(KeyValueListCell));
@@ -194,7 +194,7 @@ key_value_list_set_format(KeyValueList *item_list, const char *key, const char *
void void
key_value_list_set_output_mode (KeyValueList *item_list, const char *key, OutputMode mode) key_value_list_set_output_mode (KeyValueList *item_list, const char *key, OutputMode mode)
{ {
KeyValueListCell *cell; KeyValueListCell *cell = NULL;
for (cell = item_list->head; cell; cell = cell->next) for (cell = item_list->head; cell; cell = cell->next)
{ {
@@ -229,6 +229,95 @@ key_value_list_free(KeyValueList *item_list)
} }
void
check_status_list_set(CheckStatusList *list, const char *item, CheckStatus status, const char *details)
{
check_status_list_set_format(list, item, status, "%s", details);
}
void
check_status_list_set_format(CheckStatusList *list, const char *item, CheckStatus status, const char *details, ...)
{
CheckStatusListCell *cell;
va_list arglist;
int itemlen;
cell = (CheckStatusListCell *) pg_malloc0(sizeof(CheckStatusListCell));
if (cell == NULL)
{
log_error(_("unable to allocate memory; terminating."));
exit(ERR_BAD_CONFIG);
}
itemlen = strlen(item);
cell->item = pg_malloc0(itemlen + 1);
cell->details = pg_malloc0(MAXLEN);
cell->status = status;
strncpy(cell->item, item, itemlen);
va_start(arglist, details);
(void) xvsnprintf(cell->details, MAXLEN, details, arglist);
va_end(arglist);
if (list->tail)
list->tail->next = cell;
else
list->head = cell;
list->tail = cell;
return;
}
void
check_status_list_free(CheckStatusList *list)
{
CheckStatusListCell *cell = NULL;
CheckStatusListCell *next_cell = NULL;
cell = list->head;
while (cell != NULL)
{
next_cell = cell->next;
pfree(cell->item);
pfree(cell->details);
pfree(cell);
cell = next_cell;
}
}
const char *
output_check_status(CheckStatus status)
{
switch (status)
{
case CHECK_STATUS_OK:
return "OK";
case CHECK_STATUS_WARNING:
return "WARNING";
case CHECK_STATUS_CRITICAL:
return "CRITICAL";
case CHECK_STATUS_UNKNOWN:
return "UNKNOWN";
}
return "UNKNOWN";
}
/* /*
* Escape a string for use as a parameter in recovery.conf * Escape a string for use as a parameter in recovery.conf
* Caller must free returned value * Caller must free returned value

View File

@@ -19,6 +19,14 @@
#define MAXLEN_STR STR(MAXLEN) #define MAXLEN_STR STR(MAXLEN)
typedef enum {
CHECK_STATUS_OK = 0,
CHECK_STATUS_WARNING,
CHECK_STATUS_CRITICAL,
CHECK_STATUS_UNKNOWN
} CheckStatus;
typedef enum { typedef enum {
OM_NOT_SET = -1, OM_NOT_SET = -1,
OM_TEXT, OM_TEXT,
@@ -54,6 +62,22 @@ typedef struct KeyValueList
} KeyValueList; } KeyValueList;
typedef struct CheckStatusListCell
{
struct CheckStatusListCell *next;
char *item;
CheckStatus status;
char *details;
} CheckStatusListCell;
typedef struct CheckStatusList
{
CheckStatusListCell *head;
CheckStatusListCell *tail;
} CheckStatusList;
extern int extern int
maxlen_snprintf(char *str, const char *format,...) maxlen_snprintf(char *str, const char *format,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3))); __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
@@ -88,6 +112,18 @@ key_value_list_get(KeyValueList *item_list, const char *key);
extern void extern void
key_value_list_free(KeyValueList *item_list); key_value_list_free(KeyValueList *item_list);
extern void
check_status_list_set(CheckStatusList *list, const char *item, CheckStatus status, const char *details);
extern void
check_status_list_set_format(CheckStatusList *list, const char *item, CheckStatus status, const char *details, ...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
extern void
check_status_list_free(CheckStatusList *list);
extern const char * output_check_status(CheckStatus status);
extern char * extern char *
escape_recovery_conf_value(const char *src); escape_recovery_conf_value(const char *src);