Add "repmgr node status"

Outputs an overview of a node's status, and emits warnings if any
issues detected.
This commit is contained in:
Ian Barwick
2017-07-25 00:12:16 +09:00
parent 93c35618a2
commit 8a2e4db1bc
8 changed files with 290 additions and 11 deletions

View File

@@ -19,6 +19,8 @@ SCRIPTS_built = repmgr4 repmgrd4
PG_CPPFLAGS = -std=gnu89 -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS) PG_CPPFLAGS = -std=gnu89 -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
SHLIB_LINK = $(libpq) SHLIB_LINK = $(libpq)
HEADERS = $(wildcard *.h)
OBJS = \ OBJS = \
repmgr.o repmgr.o
@@ -27,7 +29,7 @@ include Makefile.global
$(info Building against PostgreSQL $(MAJORVERSION)) $(info Building against PostgreSQL $(MAJORVERSION))
REPMGR_CLIENT_OBJS = repmgr-client.o \ REPMGR_CLIENT_OBJS = repmgr-client.o \
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o \ repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
configfile.o log.o strutil.o dbutils.o dirutil.o compat.o controldata.o configfile.o log.o strutil.o dbutils.o dirutil.o compat.o controldata.o
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o
@@ -39,6 +41,8 @@ repmgr4: $(REPMGR_CLIENT_OBJS)
repmgrd4: $(REPMGRD_OBJS) repmgrd4: $(REPMGRD_OBJS)
$(CC) $(CFLAGS) $(REPMGRD_OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) $(CC) $(CFLAGS) $(REPMGRD_OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
$(REPMGR_CLIENT_OBJS): $(HEADERS)
$(REPMGRD_OBJS): $(HEADERS)
# Ensure Makefiles are up-to-date (should we move this to Makefile.global?) # Ensure Makefiles are up-to-date (should we move this to Makefile.global?)
Makefile: Makefile.in config.status configure Makefile: Makefile.in config.status configure
@@ -56,6 +60,7 @@ additional-clean:
rm -f repmgr-action-primary.o rm -f repmgr-action-primary.o
rm -f repmgr-action-standby.o rm -f repmgr-action-standby.o
rm -f repmgr-action-bdr.o rm -f repmgr-action-bdr.o
rm -f repmgr-action-node.o
rm -f repmgr-action-cluster.o rm -f repmgr-action-cluster.o
rm -f repmgrd.o rm -f repmgrd.o
rm -f repmgrd-physical.o rm -f repmgrd-physical.o

View File

@@ -2005,6 +2005,7 @@ delete_node_record(PGconn *conn, int node)
log_verbose(LOG_DEBUG, "delete_node_record():\n %s", query.data); log_verbose(LOG_DEBUG, "delete_node_record():\n %s", query.data);
res = PQexec(conn, query.data); res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
@@ -2019,6 +2020,43 @@ delete_node_record(PGconn *conn, int node)
return true; return true;
} }
void
get_node_replication_stats(PGconn *conn, t_node_info *node_info)
{
PQExpBufferData query;
PGresult *res;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT current_setting('max_wal_senders')::INT AS max_wal_senders, "
" (SELECT COUNT(*) FROM pg_catalog.pg_stat_replication) AS attached_wal_receivers, "
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots ");
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_warning(_("unable to retrieve node replication statistics:\n %s"),
PQerrorMessage(conn));
PQclear(res);
return;
}
node_info->max_wal_senders = atoi(PQgetvalue(res, 0, 0));
node_info->attached_wal_receivers = atoi(PQgetvalue(res, 0, 1));
node_info->max_replication_slots = atoi(PQgetvalue(res, 0, 2));
node_info->active_replication_slots = atoi(PQgetvalue(res, 0, 3));
node_info->inactive_replication_slots = atoi(PQgetvalue(res, 0, 4));
return;
}
void void
clear_node_info_list(NodeInfoList *nodes) clear_node_info_list(NodeInfoList *nodes)

View File

@@ -84,6 +84,12 @@ typedef struct s_node_info
PGconn *conn; PGconn *conn;
/* for ad-hoc use e.g. when working with a list of nodes */ /* for ad-hoc use e.g. when working with a list of nodes */
char details[MAXLEN]; char details[MAXLEN];
/* various statistics */
int max_wal_senders;
int attached_wal_receivers;
int max_replication_slots;
int active_replication_slots;
int inactive_replication_slots;
} t_node_info; } t_node_info;
@@ -104,7 +110,8 @@ typedef struct s_node_info
RECTYPE_UNKNOWN, \ RECTYPE_UNKNOWN, \
MS_NORMAL, \ MS_NORMAL, \
NULL, \ NULL, \
"" \ "", \
-1, -1, -1, -1, -1 \
} }
@@ -306,6 +313,8 @@ bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *op
void clear_node_info_list(NodeInfoList *nodes); void clear_node_info_list(NodeInfoList *nodes);
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
/* event functions */ /* event functions */
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
bool create_event_notification(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); bool create_event_notification(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);

184
repmgr-action-node.c Normal file
View File

@@ -0,0 +1,184 @@
/*
* repmgr-action-node.c
*
* Implements actions available for any kind of node
*
* Copyright (c) 2ndQuadrant, 2010-2017
*/
#include "repmgr.h"
#include "repmgr-client-global.h"
#include "repmgr-action-node.h"
void
do_node_status(void)
{
PGconn *conn;
int target_node_id = UNKNOWN_NODE_ID;
t_node_info node_info = T_NODE_INFO_INITIALIZER;
char server_version[MAXLEN];
char cluster_size[MAXLEN];
PQExpBufferData output;
ItemList warnings = { NULL, NULL };
RecoveryType recovery_type;
if (strlen(config_file_options.conninfo))
conn = establish_db_connection(config_file_options.conninfo, true);
else
conn = establish_db_connection_by_params(&source_conninfo, true);
if (runtime_options.node_id != UNKNOWN_NODE_ID)
target_node_id = runtime_options.node_id;
else
target_node_id = config_file_options.node_id;
/* Check node exists and is really a standby */
if (get_node_record(conn, target_node_id, &node_info) != RECORD_FOUND)
{
log_error(_("no record found for node %i"), target_node_id);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
(void) get_server_version(conn, server_version);
if (get_cluster_size(conn, cluster_size) == false)
strncpy(cluster_size, _("unknown"), MAXLEN);
recovery_type = get_recovery_type(conn);
get_node_replication_stats(conn, &node_info);
initPQExpBuffer(&output);
appendPQExpBuffer(
&output,
"Node \"%s\":\n",
node_info.node_name);
appendPQExpBuffer(
&output,
"%cPostgreSQL version: %s\n",
'\t', server_version);
appendPQExpBuffer(
&output,
"%cTotal data size: %s\n",
'\t', cluster_size);
appendPQExpBuffer(
&output,
"%cConninfo: \"%s\"\n",
'\t', node_info.conninfo);
appendPQExpBuffer(
&output,
"%cRole: \"%s\"\n",
'\t', get_node_type_string(node_info.type));
switch (node_info.type)
{
case PRIMARY:
if (recovery_type == RECTYPE_STANDBY)
{
item_list_append(
&warnings,
_("- node is registered as primary but running as standby"));
}
break;
case STANDBY:
if (recovery_type == RECTYPE_PRIMARY)
{
item_list_append(
&warnings,
_("- node is registered as standby but running as primary"));
}
break;
case BDR:
default:
break;
}
if (node_info.max_wal_senders >= 0)
{
appendPQExpBuffer(
&output,
"%cReplication connections: %i (of maximal %i)\n",
'\t',
node_info.attached_wal_receivers,
node_info.max_wal_senders);
}
else if (node_info.max_wal_senders == 0)
{
appendPQExpBuffer(
&output,
"%cReplication connections: disabled\n",
'\t');
}
if (node_info.max_replication_slots > 0)
{
appendPQExpBuffer(
&output,
"%cReplication slots: %i (of maximal %i)",
'\t',
node_info.active_replication_slots + node_info.inactive_replication_slots,
node_info.max_replication_slots);
if (node_info.inactive_replication_slots > 0)
{
appendPQExpBuffer(
&output,
"; %i inactive",
node_info.inactive_replication_slots);
item_list_append_format(
&warnings,
_("- node has %i inactive replication slots"),
node_info.inactive_replication_slots);
}
}
else if (node_info.max_replication_slots == 0)
{
appendPQExpBuffer(
&output,
"%cReplication slots: disabled\n",
'\t');
}
/*
appendPQExpBuffer(
&output,
"%c: \"%s\"\n",
'\t', );
appendPQExpBuffer(
&output,
"%c: \"%s\"\n",
'\t', node_info.);
*/
puts(output.data);
termPQExpBuffer(&output);
if (warnings.head != NULL && runtime_options.terse == false)
{
log_warning(_("following issue(s) were detected:"));
print_item_list(&warnings);
log_hint(_("execute \"repmgr node check\" for more details"));
}
}
void
do_node_check(void)
{
}

12
repmgr-action-node.h Normal file
View File

@@ -0,0 +1,12 @@
/*
* repmgr-action-node.h
* Copyright (c) 2ndQuadrant, 2010-2017
*/
#ifndef _REPMGR_ACTION_NODE_H_
#define _REPMGR_ACTION_NODE_H_
extern void do_node_status(void);
extern void do_node_check(void);
#endif /* _REPMGR_ACTION_NODE_H_ */

View File

@@ -1389,8 +1389,8 @@ do_standby_follow(void)
* replication slots are in use and we want to delete the old slot. * replication slots are in use and we want to delete the old slot.
*/ */
record_status = get_node_record(primary_conn, record_status = get_node_record(primary_conn,
config_file_options.node_id, config_file_options.node_id,
&local_node_record); &local_node_record);
if (record_status != RECORD_FOUND) if (record_status != RECORD_FOUND)
{ {

View File

@@ -16,6 +16,9 @@
* STANDBY UNREGISTER * STANDBY UNREGISTER
* STANDBY PROMOTE * STANDBY PROMOTE
* *
* BDR REGISTER
* BDR UNREGISTER
*
* CLUSTER SHOW * CLUSTER SHOW
* CLUSTER EVENT * CLUSTER EVENT
*/ */
@@ -30,6 +33,7 @@
#include "repmgr-action-primary.h" #include "repmgr-action-primary.h"
#include "repmgr-action-standby.h" #include "repmgr-action-standby.h"
#include "repmgr-action-bdr.h" #include "repmgr-action-bdr.h"
#include "repmgr-action-node.h"
#include "repmgr-action-cluster.h" #include "repmgr-action-cluster.h"
@@ -588,6 +592,10 @@ main(int argc, char **argv)
action = PRIMARY_REGISTER; action = PRIMARY_REGISTER;
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0) else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
action = PRIMARY_UNREGISTER; action = PRIMARY_UNREGISTER;
else if (strcasecmp(repmgr_action, "CHECK") == 0)
action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS;
} }
else if (strcasecmp(repmgr_node_type, "STANDBY") == 0) else if (strcasecmp(repmgr_node_type, "STANDBY") == 0)
@@ -608,6 +616,10 @@ main(int argc, char **argv)
action = STANDBY_ARCHIVE_CONFIG; action = STANDBY_ARCHIVE_CONFIG;
else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0) else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0)
action = STANDBY_RESTORE_CONFIG; action = STANDBY_RESTORE_CONFIG;
else if (strcasecmp(repmgr_action, "CHECK") == 0)
action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS;
} }
else if (strcasecmp(repmgr_node_type, "BDR") == 0) else if (strcasecmp(repmgr_node_type, "BDR") == 0)
#else #else
@@ -618,6 +630,18 @@ main(int argc, char **argv)
action = BDR_REGISTER; action = BDR_REGISTER;
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0) else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
action = BDR_UNREGISTER; action = BDR_UNREGISTER;
else if (strcasecmp(repmgr_action, "CHECK") == 0)
action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS;
}
else if (strcasecmp(repmgr_node_type, "NODE") == 0)
{
if (strcasecmp(repmgr_action, "CHECK") == 0)
action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS;
} }
else if (strcasecmp(repmgr_node_type, "CLUSTER") == 0) else if (strcasecmp(repmgr_node_type, "CLUSTER") == 0)
@@ -929,6 +953,11 @@ main(int argc, char **argv)
do_bdr_unregister(); do_bdr_unregister();
break; break;
/* NODE */
case NODE_STATUS:
do_node_status();
break;
/* CLUSTER */ /* CLUSTER */
case CLUSTER_SHOW: case CLUSTER_SHOW:
do_cluster_show(); do_cluster_show();

View File

@@ -22,13 +22,15 @@
#define STANDBY_SWITCHOVER 8 #define STANDBY_SWITCHOVER 8
#define STANDBY_ARCHIVE_CONFIG 9 #define STANDBY_ARCHIVE_CONFIG 9
#define STANDBY_RESTORE_CONFIG 10 #define STANDBY_RESTORE_CONFIG 10
#define CLUSTER_SHOW 11 #define BDR_REGISTER 11
#define CLUSTER_CLEANUP 12 #define BDR_UNREGISTER 12
#define CLUSTER_MATRIX 13 #define NODE_STATUS 13
#define CLUSTER_CROSSCHECK 14 #define NODE_CHECK 14
#define CLUSTER_EVENT 15 #define CLUSTER_SHOW 15
#define BDR_REGISTER 16 #define CLUSTER_CLEANUP 16
#define BDR_UNREGISTER 17 #define CLUSTER_MATRIX 17
#define CLUSTER_CROSSCHECK 18
#define CLUSTER_EVENT 19
/* command line options without short versions */ /* command line options without short versions */
#define OPT_HELP 1 #define OPT_HELP 1