From d9cb38c7f003514319d36a9752505d37afb93728 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 30 Mar 2020 17:54:24 +0900 Subject: [PATCH] node check: add --upstream option We have a --downstream option to check for attached nodes, but it would be useful to have a corresponding --upstream option too. A following patch will adapt the behaviour of this option when executed on the primary node. --- HISTORY | 1 + doc/repmgr-node-check.xml | 6 +++ repmgr-action-node.c | 95 +++++++++++++++++++++++++++++++++++++++ repmgr-client-global.h | 3 +- repmgr-client.c | 4 ++ repmgr-client.h | 38 ++++++++-------- 6 files changed, 128 insertions(+), 19 deletions(-) diff --git a/HISTORY b/HISTORY index 6850338f..f3170115 100644 --- a/HISTORY +++ b/HISTORY @@ -5,6 +5,7 @@ repmgr: ensure postgresql.auto.conf is created with correct permissions (Ian) repmgr: minimize requirement to check upstream data directory location during "standby clone" (Ian) + repmgr: add --upstream option to "node check" repmgr: report error code on follow/rejoin failure due to non-available replication slot (Ian) repmgr: ensure "node rejoin" checks for available replication slots (Ian) diff --git a/doc/repmgr-node-check.xml b/doc/repmgr-node-check.xml index 5c6a549f..093f8001 100644 --- a/doc/repmgr-node-check.xml +++ b/doc/repmgr-node-check.xml @@ -82,6 +82,12 @@ + + + : checks that the node is attached to its expected upstream + + + : checks there are no inactive physical replication slots diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 6163e710..1987e7d9 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -44,6 +44,7 @@ static void _do_node_restore_config(void); static void do_node_check_replication_connection(void); static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output); static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output); +static CheckStatus do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); @@ -791,6 +792,15 @@ do_node_check(void) exit(return_code); } + if (runtime_options.upstream == true) + { + return_code = do_node_check_upstream(conn, + runtime_options.output_mode, + &node_info, + NULL); + PQfinish(conn); + exit(return_code); + } if (runtime_options.replication_lag == true) { @@ -878,6 +888,9 @@ do_node_check(void) if (do_node_check_downstream(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK) issue_detected = true; + if (do_node_check_upstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK) + issue_detected = true; + if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK) issue_detected = true; @@ -1336,6 +1349,87 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou } +static CheckStatus +do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) +{ + PGconn *upstream_conn = NULL; + t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER; + PQExpBufferData details; + + CheckStatus status = CHECK_STATUS_OK; + + + if (mode == OM_CSV && list_output == NULL) + { + log_error(_("--csv output not provided with --upstream option")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + if (get_node_record(conn, node_info->upstream_node_id, &upstream_node_info) != RECORD_FOUND) + { + log_error(_("no record found for upstream node %i"), node_info->upstream_node_id); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + initPQExpBuffer(&details); + + upstream_conn = establish_db_connection(upstream_node_info.conninfo, true); + + /* check our node is connected */ + if (is_downstream_node_attached(upstream_conn, config_file_options.node_name) != NODE_ATTACHED) + { + appendPQExpBuffer(&details, + _("node \"%s\" (ID: %i) is not attached to expected upstream node \"%s\" (ID: %i)"), + node_info->node_name, + node_info->node_id, + upstream_node_info.node_name, + upstream_node_info.node_id); + status = CHECK_STATUS_CRITICAL; + } + else + { + appendPQExpBuffer(&details, + _("node \"%s\" (ID: %i) is attached to expected upstream node \"%s\" (ID: %i)"), + node_info->node_name, + node_info->node_id, + upstream_node_info.node_name, + upstream_node_info.node_id); + } + + switch (mode) + { + case OM_NAGIOS: + { + printf("REPMGR_UPSTREAM_SERVER %s: %s | ", + output_check_status(status), + details.data); + } + case OM_TEXT: + if (list_output != NULL) + { + check_status_list_set(list_output, + "Upstream connection", + status, + details.data); + } + else + { + printf("%s (%s)\n", + output_check_status(status), + details.data); + } + default: + break; + } + + termPQExpBuffer(&details); + + return status; +} + + static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) { @@ -3254,6 +3348,7 @@ do_node_help(void) printf(_(" Following options check an individual status:\n")); printf(_(" --archive-ready number of WAL files ready for archiving\n")); printf(_(" --downstream whether all downstream nodes are connected\n")); + printf(_(" --uptream whether the node is connected to its upstream\n")); printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); printf(_(" --role check node has expected role\n")); printf(_(" --slots check for inactive replication slots\n")); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index a10e7da2..bd8318be 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -111,6 +111,7 @@ typedef struct /* "node check" options */ bool archive_ready; bool downstream; + bool upstream; bool replication_lag; bool role; bool slots; @@ -171,7 +172,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, false, false, false, false, false, false, false, false, \ + false, false, false, false, false, false, false, false, false, false, false, \ /* "node rejoin" options */ \ "", \ /* "node service" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index 3d5fc7c9..0e39bda4 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -505,6 +505,10 @@ main(int argc, char **argv) runtime_options.downstream = true; break; + case OPT_UPSTREAM: + runtime_options.upstream = true; + break; + case OPT_REPLICATION_LAG: runtime_options.replication_lag = true; break; diff --git a/repmgr-client.h b/repmgr-client.h index 8131e0c1..55a95461 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -81,24 +81,25 @@ #define OPT_SIBLINGS_FOLLOW 1028 #define OPT_ROLE 1029 #define OPT_DOWNSTREAM 1030 -#define OPT_SLOTS 1031 -#define OPT_CONFIG_ARCHIVE_DIR 1032 -#define OPT_HAS_PASSFILE 1033 -#define OPT_WAIT_START 1034 -#define OPT_REPL_CONN 1035 -#define OPT_REMOTE_NODE_ID 1036 -#define OPT_REPLICATION_CONF_ONLY 1037 -#define OPT_NO_WAIT 1038 -#define OPT_MISSING_SLOTS 1039 -#define OPT_REPMGRD_NO_PAUSE 1040 -#define OPT_VERSION_NUMBER 1041 -#define OPT_DATA_DIRECTORY_CONFIG 1042 -#define OPT_COMPACT 1043 -#define OPT_DISABLE_WAL_RECEIVER 1044 -#define OPT_ENABLE_WAL_RECEIVER 1045 -#define OPT_DETAIL 1046 -#define OPT_REPMGRD_FORCE_UNPAUSE 1047 -#define OPT_REPLICATION_CONFIG_OWNER 1048 +#define OPT_UPSTREAM 1031 +#define OPT_SLOTS 1032 +#define OPT_CONFIG_ARCHIVE_DIR 1033 +#define OPT_HAS_PASSFILE 1034 +#define OPT_WAIT_START 1035 +#define OPT_REPL_CONN 1036 +#define OPT_REMOTE_NODE_ID 1037 +#define OPT_REPLICATION_CONF_ONLY 1038 +#define OPT_NO_WAIT 1039 +#define OPT_MISSING_SLOTS 1040 +#define OPT_REPMGRD_NO_PAUSE 1041 +#define OPT_VERSION_NUMBER 1042 +#define OPT_DATA_DIRECTORY_CONFIG 1043 +#define OPT_COMPACT 1044 +#define OPT_DISABLE_WAL_RECEIVER 1045 +#define OPT_ENABLE_WAL_RECEIVER 1046 +#define OPT_DETAIL 1047 +#define OPT_REPMGRD_FORCE_UNPAUSE 1048 +#define OPT_REPLICATION_CONFIG_OWNER 1049 /* deprecated since 4.0 */ #define OPT_CHECK_UPSTREAM_CONFIG 999 @@ -179,6 +180,7 @@ static struct option long_options[] = /* "node check" options */ {"archive-ready", no_argument, NULL, OPT_ARCHIVE_READY}, {"downstream", no_argument, NULL, OPT_DOWNSTREAM}, + {"upstream", no_argument, NULL, OPT_UPSTREAM}, {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG}, {"role", no_argument, NULL, OPT_ROLE}, {"slots", no_argument, NULL, OPT_SLOTS},