mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Add --repmgrd option to "repmgr node check"
This provides a simple way for checking whether the node's repmgrd is running. GitHub #719.
This commit is contained in:
37
dbutils.c
37
dbutils.c
@@ -6008,6 +6008,43 @@ is_wal_replay_paused(PGconn *conn, bool check_pending_wal)
|
|||||||
return is_paused;
|
return is_paused;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* repmgrd status functions */
|
||||||
|
|
||||||
|
CheckStatus
|
||||||
|
get_repmgrd_status(PGconn *conn)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
CheckStatus repmgrd_status = CHECK_STATUS_CRITICAL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBufferStr(&query,
|
||||||
|
" SELECT "
|
||||||
|
" CASE "
|
||||||
|
" WHEN repmgr.repmgrd_is_running() "
|
||||||
|
" THEN "
|
||||||
|
" CASE "
|
||||||
|
" WHEN repmgr.repmgrd_is_paused() THEN 1 ELSE 0 "
|
||||||
|
" END "
|
||||||
|
" ELSE 2 "
|
||||||
|
" END AS repmgrd_status");
|
||||||
|
res = PQexec(conn, query.data);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_db_error(conn, query.data, _("unable to execute repmgrd status query"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
repmgrd_status = atoi(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
PQclear(res);
|
||||||
|
return repmgrd_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* miscellaneous debugging functions */
|
/* miscellaneous debugging functions */
|
||||||
|
|
||||||
|
|||||||
@@ -602,6 +602,9 @@ int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
|||||||
|
|
||||||
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||||
|
|
||||||
|
/* repmgrd status functions */
|
||||||
|
CheckStatus get_repmgrd_status(PGconn *conn);
|
||||||
|
|
||||||
/* miscellaneous debugging functions */
|
/* miscellaneous debugging functions */
|
||||||
const char *print_node_status(NodeStatus node_status);
|
const char *print_node_status(NodeStatus node_status);
|
||||||
const char *print_pqping_status(PGPing ping_status);
|
const char *print_pqping_status(PGPing ping_status);
|
||||||
|
|||||||
@@ -69,6 +69,14 @@
|
|||||||
This makes it clearer what &repmgr; is trying to do.
|
This makes it clearer what &repmgr; is trying to do.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-check">repmgr node check</link>:
|
||||||
|
option <option>--repmgrd</option> added to check &repmgrd;
|
||||||
|
status.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
@@ -125,12 +125,29 @@
|
|||||||
is correctly configured.
|
is correctly configured.
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>repmgrd</title>
|
||||||
|
<para>
|
||||||
|
A separate check is available to verify whether &repmgrd; is running,
|
||||||
|
This is not included in the general output, as this does not
|
||||||
|
per-se constitute a check of the node's replication status.
|
||||||
|
</para>
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<option>--repmgrd</option>: checks whether &repmgrd; is running.
|
||||||
|
If &repmgrd; is running but paused, status <literal>1</literal>
|
||||||
|
(<literal>WARNING</literal>) is returned.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Additional checks</title>
|
<title>Additional checks</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -1079,6 +1079,29 @@ REPMGRD_OPTS="--daemonize=false"
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="repmgrd-daemon-monitoring">
|
||||||
|
<title>repmgrd daemon monitoring</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgrd</primary>
|
||||||
|
<secondary>monitoring</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<indexterm>
|
||||||
|
<primary>monitoring</primary>
|
||||||
|
<secondary>repmgrd</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The command <command><link linkend="repmgr-service-status">repmgr service status</link></command>
|
||||||
|
provides an overview of the &repmgrd; daemon status (including pause status)
|
||||||
|
on all nodes in the cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
From &repmgr; 5.3, <command><link linkend="repmgr-node-check">repmgr node check --repmgrd</link></command>
|
||||||
|
can be used to check the status of &repmgrd; (including pause status)
|
||||||
|
on the local node.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="repmgrd-connection-settings">
|
<sect1 id="repmgrd-connection-settings">
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
static bool copy_file(const char *src_file, const char *dest_file);
|
static bool copy_file(const char *src_file, const char *dest_file);
|
||||||
static void format_archive_dir(PQExpBufferData *archive_dir);
|
static void format_archive_dir(PQExpBufferData *archive_dir);
|
||||||
static t_server_action parse_server_action(const char *action);
|
static t_server_action parse_server_action(const char *action);
|
||||||
|
static const char *output_repmgrd_status(CheckStatus status);
|
||||||
|
|
||||||
static void exit_optformat_error(const char *error, int errcode);
|
static void exit_optformat_error(const char *error, int errcode);
|
||||||
|
|
||||||
@@ -52,9 +53,11 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
|
|||||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
|
static CheckStatus do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
|
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NODE STATUS
|
* NODE STATUS
|
||||||
*
|
*
|
||||||
@@ -941,6 +944,16 @@ do_node_check(void)
|
|||||||
exit(return_code);
|
exit(return_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.repmgrd == true)
|
||||||
|
{
|
||||||
|
return_code = do_node_check_repmgrd(conn,
|
||||||
|
runtime_options.output_mode,
|
||||||
|
&node_info,
|
||||||
|
NULL);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(return_code);
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.replication_config_owner == true)
|
if (runtime_options.replication_config_owner == true)
|
||||||
{
|
{
|
||||||
return_code = do_node_check_replication_config_owner(conn,
|
return_code = do_node_check_replication_config_owner(conn,
|
||||||
@@ -2024,7 +2037,6 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CheckStatus
|
CheckStatus
|
||||||
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
{
|
{
|
||||||
@@ -2159,6 +2171,53 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CheckStatus
|
||||||
|
do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
|
{
|
||||||
|
CheckStatus status = CHECK_STATUS_OK;
|
||||||
|
|
||||||
|
if (mode == OM_CSV && list_output == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("--csv output not provided with --repmgrd option"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
status = get_repmgrd_status(conn);
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case OM_OPTFORMAT:
|
||||||
|
printf("--repmgrd=%s\n",
|
||||||
|
output_check_status(status));
|
||||||
|
break;
|
||||||
|
case OM_NAGIOS:
|
||||||
|
printf("REPMGRD %s: %s\n",
|
||||||
|
output_check_status(status),
|
||||||
|
output_repmgrd_status(status));
|
||||||
|
|
||||||
|
break;
|
||||||
|
case OM_CSV:
|
||||||
|
case OM_TEXT:
|
||||||
|
if (list_output != NULL)
|
||||||
|
{
|
||||||
|
check_status_list_set(list_output,
|
||||||
|
"repmgrd",
|
||||||
|
status,
|
||||||
|
output_repmgrd_status(status));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("%s (%s)\n",
|
||||||
|
output_check_status(status),
|
||||||
|
output_repmgrd_status(status));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is not included in the general list output
|
* This is not included in the general list output
|
||||||
*/
|
*/
|
||||||
@@ -3570,6 +3629,25 @@ copy_file(const char *src_file, const char *dest_file)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
output_repmgrd_status(CheckStatus status)
|
||||||
|
{
|
||||||
|
switch (status)
|
||||||
|
{
|
||||||
|
case CHECK_STATUS_OK:
|
||||||
|
return "repmgrd running";
|
||||||
|
case CHECK_STATUS_WARNING:
|
||||||
|
return "repmgrd running but paused";
|
||||||
|
case CHECK_STATUS_CRITICAL:
|
||||||
|
return "repmgrd not running";
|
||||||
|
case CHECK_STATUS_UNKNOWN:
|
||||||
|
return "repmgrd status unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
do_node_help(void)
|
do_node_help(void)
|
||||||
{
|
{
|
||||||
@@ -3612,6 +3690,7 @@ do_node_help(void)
|
|||||||
printf(_(" --role check node has expected role\n"));
|
printf(_(" --role check node has expected role\n"));
|
||||||
printf(_(" --slots check for inactive replication slots\n"));
|
printf(_(" --slots check for inactive replication slots\n"));
|
||||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||||
|
printf(_(" --repmgrd check if repmgrd is running\n"));
|
||||||
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|||||||
@@ -120,6 +120,7 @@ typedef struct
|
|||||||
bool missing_slots;
|
bool missing_slots;
|
||||||
bool has_passfile;
|
bool has_passfile;
|
||||||
bool replication_connection;
|
bool replication_connection;
|
||||||
|
bool repmgrd;
|
||||||
bool data_directory_config;
|
bool data_directory_config;
|
||||||
bool replication_config_owner;
|
bool replication_config_owner;
|
||||||
bool db_connection;
|
bool db_connection;
|
||||||
@@ -175,7 +176,7 @@ typedef struct
|
|||||||
/* "node status" options */ \
|
/* "node status" options */ \
|
||||||
false, \
|
false, \
|
||||||
/* "node check" options */ \
|
/* "node check" options */ \
|
||||||
false, false, false, false, false, false, false, false, false, false, false, false, \
|
false, false, false, false, false, false, false, false, false, false, false, false, false, \
|
||||||
/* "node rejoin" options */ \
|
/* "node rejoin" options */ \
|
||||||
"", \
|
"", \
|
||||||
/* "node service" options */ \
|
/* "node service" options */ \
|
||||||
|
|||||||
@@ -549,6 +549,10 @@ main(int argc, char **argv)
|
|||||||
runtime_options.data_directory_config = true;
|
runtime_options.data_directory_config = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_REPMGRD:
|
||||||
|
runtime_options.repmgrd = true;
|
||||||
|
break;
|
||||||
|
|
||||||
case OPT_REPLICATION_CONFIG_OWNER:
|
case OPT_REPLICATION_CONFIG_OWNER:
|
||||||
runtime_options.replication_config_owner = true;
|
runtime_options.replication_config_owner = true;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -100,6 +100,7 @@
|
|||||||
#define OPT_DB_CONNECTION 1047
|
#define OPT_DB_CONNECTION 1047
|
||||||
#define OPT_VERIFY_BACKUP 1048
|
#define OPT_VERIFY_BACKUP 1048
|
||||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 1049
|
#define OPT_RECOVERY_MIN_APPLY_DELAY 1049
|
||||||
|
#define OPT_REPMGRD 1050
|
||||||
|
|
||||||
/* These options are for internal use only */
|
/* These options are for internal use only */
|
||||||
#define OPT_CONFIG_ARCHIVE_DIR 2001
|
#define OPT_CONFIG_ARCHIVE_DIR 2001
|
||||||
@@ -193,6 +194,7 @@ static struct option long_options[] =
|
|||||||
{"role", no_argument, NULL, OPT_ROLE},
|
{"role", no_argument, NULL, OPT_ROLE},
|
||||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||||
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
||||||
|
{"repmgrd", no_argument, NULL, OPT_REPMGRD},
|
||||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||||
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
||||||
|
|||||||
Reference in New Issue
Block a user