mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgr: make "node check" and "node status" return ERR_NODE_STATUS when appropriate
If any issue is detected (and "node check" is not being executed with a specific individual check), "ERR_NODE_STATUS" is returned.
This commit is contained in:
30
HISTORY
30
HISTORY
@@ -1,28 +1,30 @@
|
||||
4.1.0 2018-??-??
|
||||
repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
|
||||
repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
|
||||
repmgr: improve command line error handling; GitHub #464 (Ian)
|
||||
repmgr: fix "standby register --wait-sync" when no timeout provided (Ian)
|
||||
repmgr: "cluster show" returns non-zero value if an issue encountered;
|
||||
GitHub #456 (Ian)
|
||||
repmgr: "node check" and "node status" returns non-zero value if an issue
|
||||
encountered (Ian)
|
||||
repmgr: "node status" returns non-zero value if an issue encountered (Ian)
|
||||
repmgrd: create a PID file by default; GitHub #457 (Ian)
|
||||
repmgrd: daemonize process by default; GitHub #458 (Ian)
|
||||
|
||||
4.0.6 2018-06-14
|
||||
repmgr: (witness register) prevent registration of a witness server with the
|
||||
same name as an existing node (Ian)
|
||||
repmgr: (standby follow) check node has actually connected to new primary
|
||||
before reporting success; GitHub #444 (Ian)
|
||||
repmgr: (standby clone) improve handling of external configuration file copying,
|
||||
including consideration in --dry-run check; GitHub #443 (Ian)
|
||||
repmgr: (standby clone) don't require presence of "user" parameter in
|
||||
conninfo string; GitHub #437 (Ian)
|
||||
repmgr: (standby clone) improve documentation of --recovery-conf-only
|
||||
mode; GitHub #438 (Ian)
|
||||
repmgr: (node rejoin) fix bug when parsing --config-files parameter;
|
||||
GitHub #442 (Ian)
|
||||
repmgr: when using --dry-run, force log level to INFO to ensure output
|
||||
will always be displayed; GitHub #441 (Ian)
|
||||
same name as an existing node (Ian)
|
||||
repmgr: (standby follow) check node has actually connected to new primary
|
||||
before reporting success; GitHub #444 (Ian)
|
||||
repmgr: (standby clone) improve handling of external configuration file copying,
|
||||
including consideration in --dry-run check; GitHub #443 (Ian)
|
||||
repmgr: (standby clone) don't require presence of "user" parameter in
|
||||
conninfo string; GitHub #437 (Ian)
|
||||
repmgr: (standby clone) improve documentation of --recovery-conf-only
|
||||
mode; GitHub #438 (Ian)
|
||||
repmgr: (node rejoin) fix bug when parsing --config-files parameter;
|
||||
GitHub #442 (Ian)
|
||||
repmgr: when using --dry-run, force log level to INFO to ensure output
|
||||
will always be displayed; GitHub #441 (Ian)
|
||||
repmgr: (cluster matrix/crosscheck) return non-zero exit code if node
|
||||
connection issues detected; GitHub #447 (Ian)
|
||||
repmgrd: ensure local node is counted as quorum member; GitHub #439 (Ian)
|
||||
|
||||
@@ -34,11 +34,11 @@
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>repmgrd enhancements</title>
|
||||
<title>repmgr enhancements</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgr</application>: always exit with an error if an unrecognised
|
||||
command line option is provided. This matches the behaviour of other PostgreSQL
|
||||
@@ -48,10 +48,20 @@
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-cluster-show">repmgr cluster-show</link></command>
|
||||
returns non-zero exit code if node status issues detected (GitHub #456).
|
||||
<command><link linkend="repmgr-cluster-show">repmgr cluster-show</link></command>,
|
||||
<command><link linkend="repmgr-node-check">repmgr node check</link></command> and
|
||||
<command><link linkend="repmgr-node-status">repmgr node status</link></command>
|
||||
return non-zero exit code if node status issues detected. (GitHub #456).
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>repmgrd enhancements</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
|
||||
@@ -142,4 +142,11 @@
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
@@ -61,7 +61,9 @@
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived
|
||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived,
|
||||
and returns <literal>WARNING</literal> or <literal>CRITICAL</literal> if the number
|
||||
exceeds <varname>archive_ready_warning</varname> or <varname>archive_ready_critical</varname> respectively.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
@@ -107,4 +109,80 @@
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
|
||||
<para>
|
||||
When executing <command>repmgr node check</command> with one of the individual
|
||||
checks listed above, &repmgr; will emit one of the following Nagios-style exit codes
|
||||
(even if <literal>--nagios</literal> is not supplied):
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>0</literal>: OK
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>1</literal>: WARNING
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>2</literal>: ERROR
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>3</literal>: UNKNOWN
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
|
||||
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr status check</command>
|
||||
if no individual check was specified.
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-cluster-show">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
@@ -84,7 +84,8 @@
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
See <xref linkend="repmgr-node-check"> to diagnose issues.
|
||||
See <xref linkend="repmgr-node-check"> to diagnose issues and <xref linkend="repmgr-cluster-show">
|
||||
for an overview of all nodes in the cluster.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
@@ -170,11 +170,17 @@ do_node_status(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* "archive_mode" is not "off", i.e. one of "on", "always" */
|
||||
bool enabled = true;
|
||||
PQExpBufferData archiving_status;
|
||||
char archive_command[MAXLEN] = "";
|
||||
|
||||
initPQExpBuffer(&archiving_status);
|
||||
|
||||
/*
|
||||
* if the node is a standby, and "archive_mode" is "on", archiving will
|
||||
* actually be disabled.
|
||||
*/
|
||||
if (recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
if (guc_set(conn, "archive_mode", "=", "on"))
|
||||
@@ -642,6 +648,7 @@ do_node_check(void)
|
||||
CheckStatusList status_list = {NULL, NULL};
|
||||
CheckStatusListCell *cell = NULL;
|
||||
|
||||
bool issue_detected = false;
|
||||
|
||||
/* for internal use */
|
||||
if (runtime_options.has_passfile == true)
|
||||
@@ -750,12 +757,23 @@ do_node_check(void)
|
||||
initPQExpBuffer(&output);
|
||||
|
||||
/* order functions are called is also output order */
|
||||
(void) do_node_check_role(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
(void) do_node_check_replication_lag(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
(void) do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list);
|
||||
(void) do_node_check_downstream(conn, runtime_options.output_mode, &status_list);
|
||||
(void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
(void) do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list);
|
||||
if (do_node_check_role(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (do_node_check_replication_lag(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (do_node_check_downstream(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||
issue_detected = true;
|
||||
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
@@ -812,6 +830,11 @@ do_node_check(void)
|
||||
check_status_list_free(&status_list);
|
||||
|
||||
PQfinish(conn);
|
||||
|
||||
if (issue_detected == true)
|
||||
{
|
||||
exit(ERR_NODE_STATUS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user