From 10ef30096caac599a1c05a5723aa8e7535c392ec Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 14 Aug 2017 22:57:09 +0900 Subject: [PATCH] "node check": add server role check --- dbutils.c | 41 +++++++++++---- dbutils.h | 2 +- repmgr-action-bdr.c | 4 +- repmgr-action-node.c | 116 +++++++++++++++++++++++++++++++++++++++-- repmgr-action-node.h | 4 -- repmgr-client-global.h | 3 +- repmgr-client.c | 4 ++ repmgr-client.h | 2 + repmgrd-bdr.c | 2 +- 9 files changed, 156 insertions(+), 22 deletions(-) diff --git a/dbutils.c b/dbutils.c index 10837a3f..3661c3f5 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1038,9 +1038,16 @@ get_recovery_type(PGconn *conn) PQerrorMessage(conn)); recovery_type = RECTYPE_UNKNOWN; } - else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0) + else if (PQntuples(res) == 1) { - recovery_type = RECTYPE_STANDBY; + if (strcmp(PQgetvalue(res, 0, 0), "f") == 0) + { + recovery_type = RECTYPE_PRIMARY; + } + else + { + recovery_type = RECTYPE_STANDBY; + } } PQclear(res); @@ -1462,19 +1469,23 @@ get_replication_lag_seconds(PGconn *conn) " AS lag_seconds"); res = PQexec(conn, query.data); + log_verbose(LOG_DEBUG, "get_replication_lag_seconds():\n%s", query.data); termPQExpBuffer(&query); - log_verbose(LOG_DEBUG, "get_node_record():\n%s", query.data); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || !PQntuples(res)) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_warning("%s", PQerrorMessage(conn)); + log_warning("%s", PQerrorMessage(conn)); PQclear(res); /* XXX magic number */ return -1; } + if (!PQntuples(res)) + { + return -1; + } + lag_seconds = atoi(PQgetvalue(res, 0, 0)); PQclear(res); @@ -3576,7 +3587,7 @@ get_last_wal_receive_location(PGconn *conn) /* ============= */ bool -is_bdr_db(PGconn *conn) +is_bdr_db(PGconn *conn, PQExpBufferData *output) { PQExpBufferData query; PGresult *res = NULL; @@ -3604,7 +3615,13 @@ is_bdr_db(PGconn *conn) if (is_bdr_db == false) { - log_warning(_("BDR extension is not available for this database")); + const char *warning = _("BDR extension is not available for this database"); + + if (output != NULL) + appendPQExpBuffer(output, "%s", warning); + else + log_warning("%s", warning); + return is_bdr_db; } @@ -3620,11 +3637,15 @@ is_bdr_db(PGconn *conn) if (is_bdr_db == false) { - log_warning(_("BDR extension available for this database, but the database is not configured for BDR")); + const char *warning = _("BDR extension available for this database, but the database is not configured for BDR"); + if (output != NULL) + appendPQExpBuffer(output, "%s", warning); + else + log_warning("%s", warning); } - PQclear(res); + PQclear(res); return is_bdr_db; } diff --git a/dbutils.h b/dbutils.h index 38139d91..b6a29499 100644 --- a/dbutils.h +++ b/dbutils.h @@ -441,7 +441,7 @@ void get_node_replication_stats(PGconn *conn, t_node_info *node_info); /* BDR functions */ void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list); RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info); -bool is_bdr_db(PGconn *conn); +bool is_bdr_db(PGconn *conn, PQExpBufferData *output); bool is_active_bdr_node(PGconn *conn, const char *node_name); bool is_bdr_repmgr(PGconn *conn); bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); diff --git a/repmgr-action-bdr.c b/repmgr-action-bdr.c index f564b799..a318542f 100644 --- a/repmgr-action-bdr.c +++ b/repmgr-action-bdr.c @@ -42,7 +42,7 @@ do_bdr_register(void) conn = establish_db_connection(config_file_options.conninfo, true); - if (!is_bdr_db(conn)) + if (!is_bdr_db(conn, NULL)) { log_error(_("database \"%s\" is not BDR-enabled"), dbname); log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database")); @@ -336,7 +336,7 @@ do_bdr_unregister(void) conn = establish_db_connection(config_file_options.conninfo, true); - if (!is_bdr_db(conn)) + if (!is_bdr_db(conn, NULL)) { log_error(_("database \"%s\" is not BDR-enabled"), dbname); exit(ERR_BAD_CONFIG); diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 92c125a1..f1a0e109 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -29,6 +29,10 @@ static void _do_node_status_is_shutdown(void); static void _do_node_archive_config(void); static void _do_node_restore_config(void); +static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); +static CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output); +static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *list_output); + void do_node_status(void) @@ -547,11 +551,18 @@ do_node_check(void) return; } + if (runtime_options.role == true) + { + (void) do_node_check_role(conn, runtime_options.output_mode, &node_info, NULL); + PQfinish(conn); + return; + } + /* output general overview */ initPQExpBuffer(&output); - //(void) do_node_check_role(conn, runtime_options.output_mode, &output); + (void) do_node_check_role(conn, runtime_options.output_mode, &node_info, &status_list); (void) do_node_check_replication_lag(conn, runtime_options.output_mode, &status_list); (void) do_node_check_archiver(conn, runtime_options.output_mode, &status_list); @@ -594,7 +605,104 @@ do_node_check(void) PQfinish(conn); } -CheckStatus + +static CheckStatus +do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) +{ + + CheckStatus status = CHECK_STATUS_OK; + PQExpBufferData details; + RecoveryType recovery_type = get_recovery_type(conn); + + if (mode == OM_CSV) + { + log_error(_("--csv output not provided with --role option")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + initPQExpBuffer(&details); + + switch (node_info->type) + { + case PRIMARY: + if (recovery_type == RECTYPE_STANDBY) + { + status = CHECK_STATUS_CRITICAL; + appendPQExpBuffer( + &details, + _("node is registered as primary but running as standby")); + } + break; + case STANDBY: + if (recovery_type == RECTYPE_PRIMARY) + { + status = CHECK_STATUS_CRITICAL; + appendPQExpBuffer( + &details, + _("node is registered as standby but running as primary")); + } + break; + case BDR: + { + PQExpBufferData output; + + initPQExpBuffer(&output); + if (is_bdr_db(conn, &output) == false) + { + status = CHECK_STATUS_CRITICAL; + appendPQExpBuffer( + &details, + "%s", output.data); + } + termPQExpBuffer(&output); + + if (status == CHECK_STATUS_OK) + { + if (is_active_bdr_node(conn, node_info->node_name) == false) + { + status = CHECK_STATUS_CRITICAL; + appendPQExpBuffer( + &details, + _("node is not an active BDR node")); + } + } + } + default: + break; + } + + switch (mode) + { + case OM_NAGIOS: + printf("PG_SERVER_ROLE %s: %s\n", + output_check_status(status), + details.data); + break; + case OM_TEXT: + if (list_output != NULL) + { + check_status_list_set(list_output, + "Server role", + status, + details.data); + } + else + { + printf("%s (%s)\n", + output_check_status(status), + details.data); + } + default: + break; + } + + termPQExpBuffer(&details); + return status; + +} + +static CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output) { int ready_archive_files = 0; @@ -604,6 +712,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp if (mode == OM_CSV) { log_error(_("--csv output not provided with --archiver option")); + PQfinish(conn); exit(ERR_BAD_CONFIG); } @@ -748,7 +857,7 @@ do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_outp } -CheckStatus +static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *list_output) { CheckStatus status = CHECK_STATUS_UNKNOWN; @@ -758,6 +867,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *li if (mode == OM_CSV) { log_error(_("--csv output not provided with --replication-lag option")); + PQfinish(conn); exit(ERR_BAD_CONFIG); } diff --git a/repmgr-action-node.h b/repmgr-action-node.h index 4689d19d..2cb874d1 100644 --- a/repmgr-action-node.h +++ b/repmgr-action-node.h @@ -8,10 +8,6 @@ extern void do_node_status(void); extern void do_node_check(void); -//extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output); -extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, CheckStatusList *list_output); - -extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, CheckStatusList *list_output); extern void do_node_rejoin(void); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 1b327feb..b7ade5c7 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -86,6 +86,7 @@ typedef struct /* "node check" options */ bool archiver; bool replication_lag; + bool role; /* "node join" options */ char config_files[MAXLEN]; @@ -131,7 +132,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, \ + false, false, false, \ /* "node join" options */ \ "", \ /* "node service" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index 27c9aaec..7d8af941 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -430,6 +430,10 @@ main(int argc, char **argv) runtime_options.replication_lag = true; break; + case OPT_ROLE: + runtime_options.role = true; + break; + /* "node join" options * * ------------------- */ case OPT_CONFIG_FILES: diff --git a/repmgr-client.h b/repmgr-client.h index 7a74eb45..5297a85e 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -70,6 +70,7 @@ #define OPT_REPLICATION_LAG 1034 #define OPT_CONFIG_FILES 1035 #define OPT_SIBLINGS_FOLLOW 1036 +#define OPT_ROLE 1037 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 #define OPT_NO_CONNINFO_PASSWORD 998 @@ -139,6 +140,7 @@ static struct option long_options[] = /* "node check" options */ {"archiver", no_argument, NULL, OPT_ARCHIVER }, {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG }, + {"role", no_argument, NULL, OPT_ROLE }, /* "node join" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES }, diff --git a/repmgrd-bdr.c b/repmgrd-bdr.c index 4e901c92..69c26f66 100644 --- a/repmgrd-bdr.c +++ b/repmgrd-bdr.c @@ -59,7 +59,7 @@ monitor_bdr(void) */ log_info(_("connected to database, checking for BDR")); - if (!is_bdr_db(local_conn)) + if (!is_bdr_db(local_conn, NULL)) { log_error(_("database is not BDR-enabled")); exit(ERR_BAD_CONFIG);