mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 14:46:29 +00:00
Add --repmgrd option to "repmgr node check"
This provides a simple way for checking whether the node's repmgrd is running. GitHub #719.
This commit is contained in:
37
dbutils.c
37
dbutils.c
@@ -6008,6 +6008,43 @@ is_wal_replay_paused(PGconn *conn, bool check_pending_wal)
|
||||
return is_paused;
|
||||
}
|
||||
|
||||
/* repmgrd status functions */
|
||||
|
||||
CheckStatus
|
||||
get_repmgrd_status(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
CheckStatus repmgrd_status = CHECK_STATUS_CRITICAL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
" SELECT "
|
||||
" CASE "
|
||||
" WHEN repmgr.repmgrd_is_running() "
|
||||
" THEN "
|
||||
" CASE "
|
||||
" WHEN repmgr.repmgrd_is_paused() THEN 1 ELSE 0 "
|
||||
" END "
|
||||
" ELSE 2 "
|
||||
" END AS repmgrd_status");
|
||||
res = PQexec(conn, query.data);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_db_error(conn, query.data, _("unable to execute repmgrd status query"));
|
||||
}
|
||||
else
|
||||
{
|
||||
repmgrd_status = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
PQclear(res);
|
||||
return repmgrd_status;
|
||||
}
|
||||
|
||||
|
||||
/* miscellaneous debugging functions */
|
||||
|
||||
|
||||
@@ -602,6 +602,9 @@ int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||
|
||||
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||
|
||||
/* repmgrd status functions */
|
||||
CheckStatus get_repmgrd_status(PGconn *conn);
|
||||
|
||||
/* miscellaneous debugging functions */
|
||||
const char *print_node_status(NodeStatus node_status);
|
||||
const char *print_pqping_status(PGPing ping_status);
|
||||
|
||||
@@ -69,6 +69,14 @@
|
||||
This makes it clearer what &repmgr; is trying to do.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<link linkend="repmgr-node-check">repmgr node check</link>:
|
||||
option <option>--repmgrd</option> added to check &repmgrd;
|
||||
status.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
@@ -125,12 +125,29 @@
|
||||
is correctly configured.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>repmgrd</title>
|
||||
<para>
|
||||
A separate check is available to verify whether &repmgrd; is running,
|
||||
This is not included in the general output, as this does not
|
||||
per-se constitute a check of the node's replication status.
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
<option>--repmgrd</option>: checks whether &repmgrd; is running.
|
||||
If &repmgrd; is running but paused, status <literal>1</literal>
|
||||
(<literal>WARNING</literal>) is returned.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Additional checks</title>
|
||||
<para>
|
||||
|
||||
@@ -1079,6 +1079,29 @@ REPMGRD_OPTS="--daemonize=false"
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="repmgrd-daemon-monitoring">
|
||||
<title>repmgrd daemon monitoring</title>
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>monitoring</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>monitoring</primary>
|
||||
<secondary>repmgrd</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
The command <command><link linkend="repmgr-service-status">repmgr service status</link></command>
|
||||
provides an overview of the &repmgrd; daemon status (including pause status)
|
||||
on all nodes in the cluster.
|
||||
</para>
|
||||
<para>
|
||||
From &repmgr; 5.3, <command><link linkend="repmgr-node-check">repmgr node check --repmgrd</link></command>
|
||||
can be used to check the status of &repmgrd; (including pause status)
|
||||
on the local node.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-connection-settings">
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
static bool copy_file(const char *src_file, const char *dest_file);
|
||||
static void format_archive_dir(PQExpBufferData *archive_dir);
|
||||
static t_server_action parse_server_action(const char *action);
|
||||
static const char *output_repmgrd_status(CheckStatus status);
|
||||
|
||||
static void exit_optformat_error(const char *error, int errcode);
|
||||
|
||||
@@ -52,9 +53,11 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
|
||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
|
||||
|
||||
|
||||
/*
|
||||
* NODE STATUS
|
||||
*
|
||||
@@ -941,6 +944,16 @@ do_node_check(void)
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.repmgrd == true)
|
||||
{
|
||||
return_code = do_node_check_repmgrd(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.replication_config_owner == true)
|
||||
{
|
||||
return_code = do_node_check_replication_config_owner(conn,
|
||||
@@ -2024,7 +2037,6 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
CheckStatus
|
||||
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||
{
|
||||
@@ -2159,6 +2171,53 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
||||
return status;
|
||||
}
|
||||
|
||||
CheckStatus
|
||||
do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||
{
|
||||
CheckStatus status = CHECK_STATUS_OK;
|
||||
|
||||
if (mode == OM_CSV && list_output == NULL)
|
||||
{
|
||||
log_error(_("--csv output not provided with --repmgrd option"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
status = get_repmgrd_status(conn);
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
printf("--repmgrd=%s\n",
|
||||
output_check_status(status));
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
printf("REPMGRD %s: %s\n",
|
||||
output_check_status(status),
|
||||
output_repmgrd_status(status));
|
||||
|
||||
break;
|
||||
case OM_CSV:
|
||||
case OM_TEXT:
|
||||
if (list_output != NULL)
|
||||
{
|
||||
check_status_list_set(list_output,
|
||||
"repmgrd",
|
||||
status,
|
||||
output_repmgrd_status(status));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%s (%s)\n",
|
||||
output_check_status(status),
|
||||
output_repmgrd_status(status));
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is not included in the general list output
|
||||
*/
|
||||
@@ -3570,6 +3629,25 @@ copy_file(const char *src_file, const char *dest_file)
|
||||
}
|
||||
|
||||
|
||||
static const char *
|
||||
output_repmgrd_status(CheckStatus status)
|
||||
{
|
||||
switch (status)
|
||||
{
|
||||
case CHECK_STATUS_OK:
|
||||
return "repmgrd running";
|
||||
case CHECK_STATUS_WARNING:
|
||||
return "repmgrd running but paused";
|
||||
case CHECK_STATUS_CRITICAL:
|
||||
return "repmgrd not running";
|
||||
case CHECK_STATUS_UNKNOWN:
|
||||
return "repmgrd status unknown";
|
||||
}
|
||||
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_node_help(void)
|
||||
{
|
||||
@@ -3612,6 +3690,7 @@ do_node_help(void)
|
||||
printf(_(" --role check node has expected role\n"));
|
||||
printf(_(" --slots check for inactive replication slots\n"));
|
||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||
printf(_(" --repmgrd check if repmgrd is running\n"));
|
||||
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
||||
|
||||
puts("");
|
||||
|
||||
@@ -120,6 +120,7 @@ typedef struct
|
||||
bool missing_slots;
|
||||
bool has_passfile;
|
||||
bool replication_connection;
|
||||
bool repmgrd;
|
||||
bool data_directory_config;
|
||||
bool replication_config_owner;
|
||||
bool db_connection;
|
||||
@@ -175,7 +176,7 @@ typedef struct
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
false, false, false, false, false, false, false, false, false, false, false, false, \
|
||||
false, false, false, false, false, false, false, false, false, false, false, false, false, \
|
||||
/* "node rejoin" options */ \
|
||||
"", \
|
||||
/* "node service" options */ \
|
||||
|
||||
@@ -549,6 +549,10 @@ main(int argc, char **argv)
|
||||
runtime_options.data_directory_config = true;
|
||||
break;
|
||||
|
||||
case OPT_REPMGRD:
|
||||
runtime_options.repmgrd = true;
|
||||
break;
|
||||
|
||||
case OPT_REPLICATION_CONFIG_OWNER:
|
||||
runtime_options.replication_config_owner = true;
|
||||
break;
|
||||
|
||||
@@ -100,6 +100,7 @@
|
||||
#define OPT_DB_CONNECTION 1047
|
||||
#define OPT_VERIFY_BACKUP 1048
|
||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 1049
|
||||
#define OPT_REPMGRD 1050
|
||||
|
||||
/* These options are for internal use only */
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 2001
|
||||
@@ -193,6 +194,7 @@ static struct option long_options[] =
|
||||
{"role", no_argument, NULL, OPT_ROLE},
|
||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
||||
{"repmgrd", no_argument, NULL, OPT_REPMGRD},
|
||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
||||
|
||||
Reference in New Issue
Block a user