diff --git a/dbutils.c b/dbutils.c
index 64e438ee..d8d7f404 100644
--- a/dbutils.c
+++ b/dbutils.c
@@ -6008,6 +6008,43 @@ is_wal_replay_paused(PGconn *conn, bool check_pending_wal)
return is_paused;
}
+/* repmgrd status functions */
+
+CheckStatus
+get_repmgrd_status(PGconn *conn)
+{
+ PQExpBufferData query;
+ PGresult *res = NULL;
+ CheckStatus repmgrd_status = CHECK_STATUS_CRITICAL;
+
+ initPQExpBuffer(&query);
+
+ appendPQExpBufferStr(&query,
+ " SELECT "
+ " CASE "
+ " WHEN repmgr.repmgrd_is_running() "
+ " THEN "
+ " CASE "
+ " WHEN repmgr.repmgrd_is_paused() THEN 1 ELSE 0 "
+ " END "
+ " ELSE 2 "
+ " END AS repmgrd_status");
+ res = PQexec(conn, query.data);
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ log_db_error(conn, query.data, _("unable to execute repmgrd status query"));
+ }
+ else
+ {
+ repmgrd_status = atoi(PQgetvalue(res, 0, 0));
+ }
+
+ termPQExpBuffer(&query);
+ PQclear(res);
+ return repmgrd_status;
+}
+
/* miscellaneous debugging functions */
diff --git a/dbutils.h b/dbutils.h
index 62c21d9e..e2783cea 100644
--- a/dbutils.h
+++ b/dbutils.h
@@ -602,6 +602,9 @@ int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
+/* repmgrd status functions */
+CheckStatus get_repmgrd_status(PGconn *conn);
+
/* miscellaneous debugging functions */
const char *print_node_status(NodeStatus node_status);
const char *print_pqping_status(PGPing ping_status);
diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml
index b8f88db0..b2139ed5 100644
--- a/doc/appendix-release-notes.xml
+++ b/doc/appendix-release-notes.xml
@@ -69,6 +69,14 @@
This makes it clearer what &repmgr; is trying to do.
+
+
+
+ repmgr node check:
+ option added to check &repmgrd;
+ status.
+
+
diff --git a/doc/repmgr-node-check.xml b/doc/repmgr-node-check.xml
index 55f88cea..63f152d5 100644
--- a/doc/repmgr-node-check.xml
+++ b/doc/repmgr-node-check.xml
@@ -125,12 +125,29 @@
is correctly configured.
-
-
+
+
+ repmgrd
+
+ A separate check is available to verify whether &repmgrd; is running,
+ This is not included in the general output, as this does not
+ per-se constitute a check of the node's replication status.
+
+
+
+
+ : checks whether &repmgrd; is running.
+ If &repmgrd; is running but paused, status 1
+ (WARNING) is returned.
+
+
+
+
+
Additional checks
diff --git a/doc/repmgrd-configuration.xml b/doc/repmgrd-configuration.xml
index 31a4eea8..920ab6c0 100644
--- a/doc/repmgrd-configuration.xml
+++ b/doc/repmgrd-configuration.xml
@@ -1079,6 +1079,29 @@ REPMGRD_OPTS="--daemonize=false"
+
+
+ repmgrd daemon monitoring
+
+ repmgrd
+ monitoring
+
+
+ monitoring
+ repmgrd
+
+
+
+ The command repmgr service status
+ provides an overview of the &repmgrd; daemon status (including pause status)
+ on all nodes in the cluster.
+
+
+ From &repmgr; 5.3, repmgr node check --repmgrd
+ can be used to check the status of &repmgrd; (including pause status)
+ on the local node.
+
+
diff --git a/repmgr-action-node.c b/repmgr-action-node.c
index e69d7425..f709ca63 100644
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -35,6 +35,7 @@
static bool copy_file(const char *src_file, const char *dest_file);
static void format_archive_dir(PQExpBufferData *archive_dir);
static t_server_action parse_server_action(const char *action);
+static const char *output_repmgrd_status(CheckStatus status);
static void exit_optformat_error(const char *error, int errcode);
@@ -52,9 +53,11 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
+static CheckStatus do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
+
/*
* NODE STATUS
*
@@ -941,6 +944,16 @@ do_node_check(void)
exit(return_code);
}
+ if (runtime_options.repmgrd == true)
+ {
+ return_code = do_node_check_repmgrd(conn,
+ runtime_options.output_mode,
+ &node_info,
+ NULL);
+ PQfinish(conn);
+ exit(return_code);
+ }
+
if (runtime_options.replication_config_owner == true)
{
return_code = do_node_check_replication_config_owner(conn,
@@ -2024,7 +2037,6 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
return status;
}
-
CheckStatus
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
@@ -2159,6 +2171,53 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
return status;
}
+CheckStatus
+do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
+{
+ CheckStatus status = CHECK_STATUS_OK;
+
+ if (mode == OM_CSV && list_output == NULL)
+ {
+ log_error(_("--csv output not provided with --repmgrd option"));
+ PQfinish(conn);
+ exit(ERR_BAD_CONFIG);
+ }
+
+ status = get_repmgrd_status(conn);
+ switch (mode)
+ {
+ case OM_OPTFORMAT:
+ printf("--repmgrd=%s\n",
+ output_check_status(status));
+ break;
+ case OM_NAGIOS:
+ printf("REPMGRD %s: %s\n",
+ output_check_status(status),
+ output_repmgrd_status(status));
+
+ break;
+ case OM_CSV:
+ case OM_TEXT:
+ if (list_output != NULL)
+ {
+ check_status_list_set(list_output,
+ "repmgrd",
+ status,
+ output_repmgrd_status(status));
+ }
+ else
+ {
+ printf("%s (%s)\n",
+ output_check_status(status),
+ output_repmgrd_status(status));
+ }
+ default:
+ break;
+ }
+
+ return status;
+}
+
/*
* This is not included in the general list output
*/
@@ -3570,6 +3629,25 @@ copy_file(const char *src_file, const char *dest_file)
}
+static const char *
+output_repmgrd_status(CheckStatus status)
+{
+ switch (status)
+ {
+ case CHECK_STATUS_OK:
+ return "repmgrd running";
+ case CHECK_STATUS_WARNING:
+ return "repmgrd running but paused";
+ case CHECK_STATUS_CRITICAL:
+ return "repmgrd not running";
+ case CHECK_STATUS_UNKNOWN:
+ return "repmgrd status unknown";
+ }
+
+ return "UNKNOWN";
+}
+
+
void
do_node_help(void)
{
@@ -3612,6 +3690,7 @@ do_node_help(void)
printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n"));
printf(_(" --missing-slots check for missing replication slots\n"));
+ printf(_(" --repmgrd check if repmgrd is running\n"));
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
puts("");
diff --git a/repmgr-client-global.h b/repmgr-client-global.h
index fe9a3968..c16bb9ea 100644
--- a/repmgr-client-global.h
+++ b/repmgr-client-global.h
@@ -120,6 +120,7 @@ typedef struct
bool missing_slots;
bool has_passfile;
bool replication_connection;
+ bool repmgrd;
bool data_directory_config;
bool replication_config_owner;
bool db_connection;
@@ -175,7 +176,7 @@ typedef struct
/* "node status" options */ \
false, \
/* "node check" options */ \
- false, false, false, false, false, false, false, false, false, false, false, false, \
+ false, false, false, false, false, false, false, false, false, false, false, false, false, \
/* "node rejoin" options */ \
"", \
/* "node service" options */ \
diff --git a/repmgr-client.c b/repmgr-client.c
index acea3524..fb554a73 100644
--- a/repmgr-client.c
+++ b/repmgr-client.c
@@ -549,6 +549,10 @@ main(int argc, char **argv)
runtime_options.data_directory_config = true;
break;
+ case OPT_REPMGRD:
+ runtime_options.repmgrd = true;
+ break;
+
case OPT_REPLICATION_CONFIG_OWNER:
runtime_options.replication_config_owner = true;
break;
diff --git a/repmgr-client.h b/repmgr-client.h
index 8488b640..8ce3451b 100644
--- a/repmgr-client.h
+++ b/repmgr-client.h
@@ -100,6 +100,7 @@
#define OPT_DB_CONNECTION 1047
#define OPT_VERIFY_BACKUP 1048
#define OPT_RECOVERY_MIN_APPLY_DELAY 1049
+#define OPT_REPMGRD 1050
/* These options are for internal use only */
#define OPT_CONFIG_ARCHIVE_DIR 2001
@@ -193,6 +194,7 @@ static struct option long_options[] =
{"role", no_argument, NULL, OPT_ROLE},
{"slots", no_argument, NULL, OPT_SLOTS},
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
+ {"repmgrd", no_argument, NULL, OPT_REPMGRD},
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
diff --git a/strutil.c b/strutil.c
index c9b1e462..a9f37f74 100644
--- a/strutil.c
+++ b/strutil.c
@@ -369,7 +369,6 @@ check_status_list_free(CheckStatusList *list)
}
-
const char *
output_check_status(CheckStatus status)
{