repmgr: have "cluster show" exit with a non-zero value if issues detected

If any issues are detected (e.g. node not reachable, unexpected node status
etc.), "repmgr cluster show" returns exit code 25 ("ERR_NODE_STATUS").

Note that exit code 25 was introduced recently as "ERR_CLUSTER_CHECK",
however it makes sense to use this to indicate issues detected by any
command which can detect node issues.

Addresses GitHub #456.
This commit is contained in:
Ian Barwick
2018-07-05 10:47:31 +09:00
parent 29de052dd8
commit 4c7c681a14
7 changed files with 71 additions and 7 deletions

View File

@@ -2,6 +2,8 @@
repmgr: add "--missing-slots" check to "repmgr node check" (Ian) repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
repmgr: improve command line error handling; GitHub #464 (Ian) repmgr: improve command line error handling; GitHub #464 (Ian)
repmgr: fix "standby register --wait-sync" when no timeout provided (Ian) repmgr: fix "standby register --wait-sync" when no timeout provided (Ian)
repmgr: "cluster show" returns non-zero value if an issue encountered;
GitHub #456 (Ian)
repmgrd: create a PID file by default; GitHub #457 (Ian) repmgrd: create a PID file by default; GitHub #457 (Ian)
repmgrd: daemonize process by default; GitHub #458 (Ian) repmgrd: daemonize process by default; GitHub #458 (Ian)

View File

@@ -46,6 +46,12 @@
</para> </para>
</listitem> </listitem>
<listitem>
<para>
<command><link linkend="repmgr-cluster-show">repmgr cluster-show</link></command>
returns non-zero exit code if node status issues detected (GitHub #456).
</para>
</listitem>
<listitem> <listitem>
<para> <para>
@@ -76,7 +82,7 @@
<listitem> <listitem>
<para> <para>
<application>repmgr</application>: fix <command><link linkend="repmgr-standby-register">repmgr standby register--wait-sync </link></command> <application>repmgr</application>: fix <command><link linkend="repmgr-standby-register">repmgr standby register --wait-sync</link></command>
when no timeout provided. when no timeout provided.
</para> </para>
</listitem> </listitem>

View File

@@ -56,7 +56,7 @@
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><option>ERR_CLUSTER_CHECK (25)</option></term> <term><option>ERR_NODE_STATUS (25)</option></term>
<listitem> <listitem>
<para> <para>
One or more nodes could not be reached. One or more nodes could not be reached.

View File

@@ -116,7 +116,7 @@
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><option>ERR_CLUSTER_CHECK (25)</option></term> <term><option>ERR_NODE_STATUS (25)</option></term>
<listitem> <listitem>
<para> <para>
One or more nodes could not be reached. One or more nodes could not be reached.

View File

@@ -113,4 +113,33 @@
</para> </para>
</refsect1> </refsect1>
<refsect1>
<title>Exit codes</title>
<para>
Following exit codes can be emitted by <command>repmgr cluster show</command>:
</para>
<variablelist>
<varlistentry>
<term><option>SUCCESS (0)</option></term>
<listitem>
<para>
No issues were detected.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>ERR_NODE_STATUS (25)</option></term>
<listitem>
<para>
One or more issues were detected.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
</refentry> </refentry>

View File

@@ -46,6 +46,6 @@
#define ERR_SWITCHOVER_INCOMPLETE 22 #define ERR_SWITCHOVER_INCOMPLETE 22
#define ERR_FOLLOW_FAIL 23 #define ERR_FOLLOW_FAIL 23
#define ERR_REJOIN_FAIL 24 #define ERR_REJOIN_FAIL 24
#define ERR_CLUSTER_CHECK 25 #define ERR_NODE_STATUS 25
#endif /* _ERRCODE_H_ */ #endif /* _ERRCODE_H_ */

View File

@@ -83,6 +83,7 @@ do_cluster_show(void)
int i = 0; int i = 0;
ItemList warnings = {NULL, NULL}; ItemList warnings = {NULL, NULL};
bool success = false; bool success = false;
bool error_found = false;
/* Connect to local database to obtain cluster connection data */ /* Connect to local database to obtain cluster connection data */
log_verbose(LOG_INFO, _("connecting to database")); log_verbose(LOG_INFO, _("connecting to database"));
@@ -218,6 +219,7 @@ do_cluster_show(void)
else else
{ {
appendPQExpBuffer(&details, "- failed"); appendPQExpBuffer(&details, "- failed");
error_found = true;
} }
} }
} }
@@ -281,6 +283,7 @@ do_cluster_show(void)
else else
{ {
appendPQExpBuffer(&details, "- failed"); appendPQExpBuffer(&details, "- failed");
error_found = true;
} }
} }
} }
@@ -292,17 +295,27 @@ do_cluster_show(void)
if (cell->node_info->node_status == NODE_STATUS_UP) if (cell->node_info->node_status == NODE_STATUS_UP)
{ {
if (cell->node_info->active == true) if (cell->node_info->active == true)
{
appendPQExpBuffer(&details, "* running"); appendPQExpBuffer(&details, "* running");
}
else else
{
appendPQExpBuffer(&details, "! running"); appendPQExpBuffer(&details, "! running");
error_found = true;
}
} }
/* node is unreachable */ /* node is unreachable */
else else
{ {
if (cell->node_info->active == true) if (cell->node_info->active == true)
{
appendPQExpBuffer(&details, "? unreachable"); appendPQExpBuffer(&details, "? unreachable");
}
else else
{
appendPQExpBuffer(&details, "- failed"); appendPQExpBuffer(&details, "- failed");
error_found = true;
}
} }
} }
break; break;
@@ -310,6 +323,7 @@ do_cluster_show(void)
{ {
/* this should never happen */ /* this should never happen */
appendPQExpBuffer(&details, "? unknown node type"); appendPQExpBuffer(&details, "? unknown node type");
error_found = true;
} }
break; break;
} }
@@ -414,7 +428,6 @@ do_cluster_show(void)
PQfinish(conn); PQfinish(conn);
/* emit any warnings */ /* emit any warnings */
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV) if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
{ {
ItemListCell *cell = NULL; ItemListCell *cell = NULL;
@@ -425,6 +438,20 @@ do_cluster_show(void)
printf(_(" - %s\n"), cell->string); printf(_(" - %s\n"), cell->string);
} }
} }
/*
* If warnings were noted, even if they're not displayed (e.g. in --csv node),
* that means something's not right so we need to emit a non-zero exit code.
*/
if (warnings.head != NULL)
{
error_found = true;
}
if (error_found == true)
{
exit(ERR_NODE_STATUS);
}
} }
@@ -696,7 +723,7 @@ do_cluster_crosscheck(void)
if (error_found == true) if (error_found == true)
{ {
exit(ERR_CLUSTER_CHECK); exit(ERR_NODE_STATUS);
} }
} }
@@ -786,7 +813,7 @@ do_cluster_matrix()
if (error_found == true) if (error_found == true)
{ {
exit(ERR_CLUSTER_CHECK); exit(ERR_NODE_STATUS);
} }
} }