daemon status: make output more like that of "cluster show"

In particular make any issues with unexpected server state more
obvious.
This commit is contained in:
Ian Barwick
2019-04-25 14:44:41 +09:00
parent da24896fd5
commit 9fe2fa2daf
11 changed files with 397 additions and 279 deletions

View File

@@ -1,3 +1,6 @@
4.4 2019-??-??
repmgr: improve "daemon status" output (Ian)
4.3.1 2019-??-?? 4.3.1 2019-??-??
repmgr: ensure BDR2-specific functionality cannot be used on repmgr: ensure BDR2-specific functionality cannot be used on
BDR3 and later (Ian) BDR3 and later (Ian)

View File

@@ -2447,8 +2447,7 @@ get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info)
initPQExpBuffer(&query); initPQExpBuffer(&query);
appendPQExpBuffer(&query, appendPQExpBuffer(&query,
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " " SELECT " REPMGR_NODES_COLUMNS_WITH_UPSTREAM
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached "
" FROM repmgr.nodes n " " FROM repmgr.nodes n "
" LEFT JOIN repmgr.nodes un " " LEFT JOIN repmgr.nodes un "
" ON un.node_id = n.upstream_node_id" " ON un.node_id = n.upstream_node_id"
@@ -2747,8 +2746,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
initPQExpBuffer(&query); initPQExpBuffer(&query);
appendPQExpBufferStr(&query, appendPQExpBufferStr(&query,
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " " SELECT " REPMGR_NODES_COLUMNS_WITH_UPSTREAM
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached "
" FROM repmgr.nodes n " " FROM repmgr.nodes n "
" LEFT JOIN repmgr.nodes un " " LEFT JOIN repmgr.nodes un "
" ON un.node_id = n.upstream_node_id" " ON un.node_id = n.upstream_node_id"

View File

@@ -30,6 +30,10 @@
#include "voting.h" #include "voting.h"
#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name, NULL AS attached " #define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name, NULL AS attached "
#define REPMGR_NODES_COLUMNS_WITH_UPSTREAM "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached "
#define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''" #define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''"
#define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name" #define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name"

View File

@@ -1,3 +1,4 @@
<appendix id="appendix-release-notes"> <appendix id="appendix-release-notes">
<title>Release notes</title> <title>Release notes</title>
<indexterm> <indexterm>
@@ -18,6 +19,26 @@
<sect1 id="release-4.4"> <sect1 id="release-4.4">
<title>Release 4.4</title> <title>Release 4.4</title>
<para><emphasis>???, 2019</emphasis></para> <para><emphasis>???, 2019</emphasis></para>
<sect2>
<title>repmgr client enhancements</title>
<para>
<itemizedlist>
<listitem>
<para>
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>:
make output similar to that of
<link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>
for consistency and to make it easier to identify nodes not in the expected
state.
</para>
</listitem>
</itemizedlist>
</para>
</sect2>
</sect1> </sect1>
<sect1 id="release-4.3.1"> <sect1 id="release-4.3.1">

View File

@@ -52,33 +52,31 @@
<para> <para>
<application>repmgrd</application> running normally on all nodes: <application>repmgrd</application> running normally on all nodes:
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+----------+---------+---------+-------+---------+-------------------- ----+-------+---------+-----------+----------+---------+-------+---------+--------------------
1 | node1 | primary | 100 | running | running | 71987 | no | n/a 1 | node1 | primary | * running | | running | 96563 | no | n/a
2 | node2 | standby | 100 | running | running | 71996 | no | 1 second(s) ago 2 | node2 | standby | running | node1 | running | 96572 | no | 1 second(s) ago
3 | node3 | standby | 100 | running | running | 72042 | no | 1 second(s) ago 3 | node3 | standby | running | node1 | running | 96584 | no | 0 second(s) ago</programlisting>
</programlisting>
</para> </para>
<para> <para>
<application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">): <application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+----------+---------+---------+-------+---------+-------------------- ----+-------+---------+-----------+----------+---------+-------+---------+--------------------
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a 1 | node1 | primary | * running | | running | 96563 | yes | n/a
2 | node2 | standby | 100 | running | running | 71996 | yes | 0 second(s) ago 2 | node2 | standby | running | node1 | running | 96572 | yes | 1 second(s) ago
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago 3 | node3 | standby | running | node1 | running | 96584 | yes | 0 second(s) ago</programlisting>
</programlisting>
</para> </para>
<para> <para>
<application>repmgrd</application> not running on one node: <application>repmgrd</application> not running on one node:
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+----------+---------+-------------+-------+---------+-------------------- ----+-------+---------+-----------+----------+-------------+-------+---------+--------------------
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a 1 | node1 | primary | * running | | running | 96563 | yes | n/a
2 | node2 | standby | 100 | running | not running | n/a | n/a | n/a 2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago</programlisting> 3 | node3 | standby | running | node1 | running | 96584 | yes | 0 second(s) ago</programlisting>
</para> </para>
</refsect1> </refsect1>
@@ -96,9 +94,9 @@
parsing by scripts, e.g.: parsing by scripts, e.g.:
<programlisting> <programlisting>
$ repmgr -f /etc/repmgr.conf daemon status --csv $ repmgr -f /etc/repmgr.conf daemon status --csv
1,node1,primary,1,1,5722,1,100,-1 1,node1,primary,1,1,5722,1,100,-1,default
2,node2,standby,1,0,-1,1,100,1 2,node2,standby,1,0,-1,1,100,1,default
3,node3,standby,1,1,5779,1,100,1</programlisting> 3,node3,standby,1,1,5779,1,100,1,default</programlisting>
</para> </para>
<para> <para>
The columns have following meanings: The columns have following meanings:
@@ -157,9 +155,25 @@
</simpara> </simpara>
</listitem> </listitem>
<listitem>
<simpara>
node location
</simpara>
</listitem>
</itemizedlist> </itemizedlist>
</para> </para>
</listitem> </listitem>
</varlistentry>
<varlistentry>
<term><option>--detail</option></term>
<listitem>
<para>
Display additional information (<literal>location</literal>, <literal>priority</literal>)
about the &repmgr; configuration.
</para>
</listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
@@ -175,8 +189,6 @@
</refsect1> </refsect1>
<refsect1> <refsect1>
<title>See also</title> <title>See also</title>
<para> <para>

View File

@@ -184,231 +184,10 @@ do_cluster_show(void)
initPQExpBuffer(&details); initPQExpBuffer(&details);
/* if (format_node_status(cell->node_info, &details, &warnings) == false)
* TODO: count nodes marked as "? unreachable" and add a hint about error_found = true;
* the other cluster commands for better determining whether
* unreachable.
*/
switch (cell->node_info->type)
{
case PRIMARY:
{
/* node is reachable */
if (cell->node_info->node_status == NODE_STATUS_UP)
{
if (cell->node_info->active == true)
{
switch (cell->node_info->recovery_type)
{
case RECTYPE_PRIMARY:
appendPQExpBufferStr(&details, "* running");
break;
case RECTYPE_STANDBY:
appendPQExpBufferStr(&details, "! running as standby");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is registered as primary but running as standby",
cell->node_info->node_name, cell->node_info->node_id);
break;
case RECTYPE_UNKNOWN:
appendPQExpBufferStr(&details, "! unknown");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) has unknown replication status",
cell->node_info->node_name, cell->node_info->node_id);
break;
}
}
else
{
if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
{
appendPQExpBufferStr(&details, "! running");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
cell->node_info->node_name, cell->node_info->node_id);
}
else
{
appendPQExpBufferStr(&details, "! running as standby");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
cell->node_info->node_name, cell->node_info->node_id);
}
}
}
/* node is up but cannot connect */
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
{
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? running");
}
else
{
appendPQExpBufferStr(&details, "! running");
error_found = true;
}
}
/* node is unreachable */
else
{
/* node is unreachable but marked active */
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? unreachable");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
cell->node_info->node_name, cell->node_info->node_id);
}
/* node is unreachable and marked as inactive */
else
{
appendPQExpBufferStr(&details, "- failed");
error_found = true;
}
}
}
break;
case STANDBY:
{
/* node is reachable */
if (cell->node_info->node_status == NODE_STATUS_UP)
{
if (cell->node_info->active == true)
{
switch (cell->node_info->recovery_type)
{
case RECTYPE_STANDBY:
appendPQExpBufferStr(&details, " running");
break;
case RECTYPE_PRIMARY:
appendPQExpBufferStr(&details, "! running as primary");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is registered as standby but running as primary",
cell->node_info->node_name, cell->node_info->node_id);
break;
case RECTYPE_UNKNOWN:
appendPQExpBufferStr(&details, "! unknown");
item_list_append_format(
&warnings,
"node \"%s\" (ID: %i) has unknown replication status",
cell->node_info->node_name, cell->node_info->node_id);
break;
}
}
else
{
if (cell->node_info->recovery_type == RECTYPE_STANDBY)
{
appendPQExpBufferStr(&details, "! running");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
cell->node_info->node_name, cell->node_info->node_id);
}
else
{
appendPQExpBufferStr(&details, "! running as primary");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
cell->node_info->node_name, cell->node_info->node_id);
}
}
/* warn about issue with paused WAL replay */
if (is_wal_replay_paused(cell->node_info->conn, true))
{
item_list_append_format(&warnings,
_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
cell->node_info->node_name, cell->node_info->node_id);
}
}
/* node is up but cannot connect */
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
{
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? running");
}
else
{
appendPQExpBufferStr(&details, "! running");
error_found = true;
}
}
/* node is unreachable */
else
{
/* node is unreachable but marked active */
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? unreachable");
item_list_append_format(&warnings,
"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
cell->node_info->node_name, cell->node_info->node_id);
}
else
{
appendPQExpBufferStr(&details, "- failed");
error_found = true;
}
}
}
break;
case WITNESS:
case BDR:
{
/* node is reachable */
if (cell->node_info->node_status == NODE_STATUS_UP)
{
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "* running");
}
else
{
appendPQExpBufferStr(&details, "! running");
error_found = true;
}
}
/* node is up but cannot connect */
else if (cell->node_info->node_status == NODE_STATUS_REJECTED)
{
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? rejected");
}
else
{
appendPQExpBufferStr(&details, "! failed");
error_found = true;
}
}
/* node is unreachable */
else
{
if (cell->node_info->active == true)
{
appendPQExpBufferStr(&details, "? unreachable");
}
else
{
appendPQExpBufferStr(&details, "- failed");
error_found = true;
}
}
}
break;
case UNKNOWN:
{
/* this should never happen */
appendPQExpBufferStr(&details, "? unknown node type");
error_found = true;
}
break;
}
strncpy(cell->node_info->details, details.data, MAXLEN); strncpy(cell->node_info->details, details.data, MAXLEN);
termPQExpBuffer(&details); termPQExpBuffer(&details);
PQfinish(cell->node_info->conn); PQfinish(cell->node_info->conn);

View File

@@ -43,15 +43,17 @@ typedef enum
STATUS_ID = 0, STATUS_ID = 0,
STATUS_NAME, STATUS_NAME,
STATUS_ROLE, STATUS_ROLE,
STATUS_PRIORITY,
STATUS_PG, STATUS_PG,
STATUS_RUNNING, STATUS_UPSTREAM_NAME,
STATUS_LOCATION,
STATUS_PRIORITY,
STATUS_REPMGRD,
STATUS_PID, STATUS_PID,
STATUS_PAUSED, STATUS_PAUSED,
STATUS_UPSTREAM_LAST_SEEN STATUS_UPSTREAM_LAST_SEEN
} StatusHeader; } StatusHeader;
#define STATUS_HEADER_COUNT 9 #define STATUS_HEADER_COUNT 11
struct ColHeader headers_status[STATUS_HEADER_COUNT]; struct ColHeader headers_status[STATUS_HEADER_COUNT];
@@ -91,14 +93,17 @@ do_daemon_status(void)
strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN); strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN); strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN); strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
strncpy(headers_status[STATUS_UPSTREAM_NAME].title, _("Upstream"), MAXLEN);
/* following only displayed with the --detail option */
strncpy(headers_status[STATUS_LOCATION].title, _("Location"), MAXLEN);
if (runtime_options.compact == true) if (runtime_options.compact == true)
strncpy(headers_status[STATUS_PRIORITY].title, _("Prio."), MAXLEN); strncpy(headers_status[STATUS_PRIORITY].title, _("Prio."), MAXLEN);
else else
strncpy(headers_status[STATUS_PRIORITY].title, _("Priority"), MAXLEN); strncpy(headers_status[STATUS_PRIORITY].title, _("Priority"), MAXLEN);
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN); strncpy(headers_status[STATUS_REPMGRD].title, _("repmgrd"), MAXLEN);
strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN); strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN); strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
@@ -107,19 +112,23 @@ do_daemon_status(void)
else else
strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN); strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN);
for (i = 0; i < STATUS_HEADER_COUNT; i++) for (i = 0; i < STATUS_HEADER_COUNT; i++)
{ {
headers_status[i].max_length = strlen(headers_status[i].title); headers_status[i].max_length = strlen(headers_status[i].title);
headers_status[i].display = true; headers_status[i].display = true;
} }
if (runtime_options.detail == false)
{
headers_status[STATUS_LOCATION].display = false;
headers_status[STATUS_PRIORITY].display = false;
}
i = 0; i = 0;
for (cell = nodes.head; cell; cell = cell->next) for (cell = nodes.head; cell; cell = cell->next)
{ {
int j; int j;
PQExpBufferData buf;
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo)); repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id; repmgrd_info[i]->node_id = cell->node_info->node_id;
@@ -135,6 +144,14 @@ do_daemon_status(void)
if (PQstatus(cell->node_info->conn) != CONNECTION_OK) if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{ {
/* check if node is reachable, but just not letting us in */
if (is_server_available_quiet(cell->node_info->conninfo))
cell->node_info->node_status = NODE_STATUS_REJECTED;
else
cell->node_info->node_status = NODE_STATUS_DOWN;
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
connection_error_found = true; connection_error_found = true;
if (runtime_options.verbose) if (runtime_options.verbose)
@@ -155,13 +172,15 @@ do_daemon_status(void)
} }
repmgrd_info[i]->pg_running = false; repmgrd_info[i]->pg_running = false;
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running")); //maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running"));
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a")); maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a"));
maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a")); maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
} }
else else
{ {
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running")); cell->node_info->node_status = NODE_STATUS_UP;
cell->node_info->recovery_type = get_recovery_type(cell->node_info->conn);
//maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running"));
repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn); repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
@@ -217,22 +236,38 @@ do_daemon_status(void)
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen); maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen);
} }
} }
PQfinish(cell->node_info->conn);
} }
{
PQExpBufferData details;
initPQExpBuffer(&details);
(void)format_node_status(cell->node_info, &details, &warnings);
strncpy(repmgrd_info[i]->pg_running_text, details.data, MAXLEN);
termPQExpBuffer(&details);
}
PQfinish(cell->node_info->conn);
headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name); headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type)); headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
headers_status[STATUS_UPSTREAM_NAME].cur_length = strlen(cell->node_info->upstream_node_name);
initPQExpBuffer(&buf); if (runtime_options.detail == true)
appendPQExpBuffer(&buf, "%i", cell->node_info->priority); {
headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data); PQExpBufferData buf;
termPQExpBuffer(&buf);
headers_status[STATUS_LOCATION].cur_length = strlen(cell->node_info->location);
initPQExpBuffer(&buf);
appendPQExpBuffer(&buf, "%i", cell->node_info->priority);
headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data);
termPQExpBuffer(&buf);
}
headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text); headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running); headers_status[STATUS_REPMGRD].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text); headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text);
@@ -269,7 +304,7 @@ do_daemon_status(void)
paused = -1; paused = -1;
} }
printf("%i,%s,%s,%i,%i,%i,%i,%i,%i\n", printf("%i,%s,%s,%i,%i,%i,%i,%i,%i,%s\n",
cell->node_info->node_id, cell->node_info->node_id,
cell->node_info->node_name, cell->node_info->node_name,
get_node_type_string(cell->node_info->type), get_node_type_string(cell->node_info->type),
@@ -280,17 +315,24 @@ do_daemon_status(void)
cell->node_info->priority, cell->node_info->priority,
repmgrd_info[i]->pid == UNKNOWN_PID repmgrd_info[i]->pid == UNKNOWN_PID
? -1 ? -1
: repmgrd_info[i]->upstream_last_seen); : repmgrd_info[i]->upstream_last_seen,
cell->node_info->priority);
} }
else else
{ {
printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id); printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id);
printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name); printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type)); printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority);
printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text); printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running); printf("| %-*s ", headers_status[STATUS_UPSTREAM_NAME].max_length, cell->node_info->upstream_node_name);
if (runtime_options.detail == true)
{
printf("| %-*s ", headers_status[STATUS_LOCATION].max_length, cell->node_info->location);
printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority);
}
printf("| %-*s ", headers_status[STATUS_REPMGRD].max_length, repmgrd_info[i]->repmgrd_running);
printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text); printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
if (repmgrd_info[i]->pid == UNKNOWN_PID) if (repmgrd_info[i]->pid == UNKNOWN_PID)
@@ -441,7 +483,7 @@ _do_repmgr_pause(bool pause)
void void
fetch_node_records(PGconn *conn, NodeInfoList *node_list) fetch_node_records(PGconn *conn, NodeInfoList *node_list)
{ {
bool success = get_all_node_records(conn, node_list); bool success = get_all_node_records_with_upstream(conn, node_list);
if (success == false) if (success == false)
{ {
@@ -756,6 +798,7 @@ void do_daemon_help(void)
printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n")); printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n"));
puts(""); puts("");
printf(_(" --csv emit output as CSV\n")); printf(_(" --csv emit output as CSV\n"));
printf(_(" --detail show additional detail\n"));
printf(_(" --verbose show text of database connection error messages\n")); printf(_(" --verbose show text of database connection error messages\n"));
puts(""); puts("");

View File

@@ -45,6 +45,7 @@ typedef struct
int wait; int wait;
bool no_wait; bool no_wait;
bool compact; bool compact;
bool detail;
/* logging options */ /* logging options */
char log_level[MAXLEN]; /* overrides setting in repmgr.conf */ char log_level[MAXLEN]; /* overrides setting in repmgr.conf */
@@ -143,7 +144,7 @@ typedef struct
/* configuration metadata */ \ /* configuration metadata */ \
false, false, false, false, false, \ false, false, false, false, false, \
/* general configuration options */ \ /* general configuration options */ \
"", false, false, "", -1, false, false, \ "", false, false, "", -1, false, false, false, \
/* logging options */ \ /* logging options */ \
"", false, false, false, false, \ "", false, false, false, false, \
/* output options */ \ /* output options */ \
@@ -241,8 +242,8 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc
extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record); extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
extern void make_repmgrd_path(PQExpBufferData *output_buf); extern void make_repmgrd_path(PQExpBufferData *output_buf);
/* display functions */ /* display functions */
extern bool format_node_status(t_node_info *node_info, PQExpBufferData *details, ItemList *warnings);
extern void print_help_header(void); extern void print_help_header(void);
extern void print_status_header(int cols, ColHeader *headers); extern void print_status_header(int cols, ColHeader *headers);

View File

@@ -271,6 +271,10 @@ main(int argc, char **argv)
runtime_options.compact = true; runtime_options.compact = true;
break; break;
/* --detail */
case OPT_DETAIL:
runtime_options.detail = true;
break;
/*---------------------------- /*----------------------------
* database connection options * database connection options
@@ -1916,7 +1920,6 @@ check_cli_parameters(const int action)
} }
/* --compact */ /* --compact */
if (runtime_options.compact == true) if (runtime_options.compact == true)
{ {
switch (action) switch (action)
@@ -1931,6 +1934,20 @@ check_cli_parameters(const int action)
} }
} }
/* --detail */
if (runtime_options.compact == true)
{
switch (action)
{
case DAEMON_STATUS:
break;
default:
item_list_append_format(&cli_warnings,
_("--detail is not effective when executing %s"),
action_name(action));
}
}
/* --disable-wal-receiver / --enable-wal-receiver */ /* --disable-wal-receiver / --enable-wal-receiver */
if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true) if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true)
{ {
@@ -1955,6 +1972,238 @@ check_cli_parameters(const int action)
} }
bool
format_node_status(t_node_info *node_info, PQExpBufferData *details, ItemList *warnings)
{
bool error_found = false;
/*
* TODO: count nodes marked as "? unreachable" and add a hint about
* the other cluster commands for better determining whether
* unreachable.
*/
switch (node_info->type)
{
case PRIMARY:
{
/* node is reachable */
if (node_info->node_status == NODE_STATUS_UP)
{
if (node_info->active == true)
{
switch (node_info->recovery_type)
{
case RECTYPE_PRIMARY:
appendPQExpBufferStr(details, "* running");
break;
case RECTYPE_STANDBY:
appendPQExpBufferStr(details, "! running as standby");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is registered as primary but running as standby",
node_info->node_name, node_info->node_id);
break;
case RECTYPE_UNKNOWN:
appendPQExpBufferStr(details, "! unknown");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) has unknown replication status",
node_info->node_name, node_info->node_id);
break;
}
}
else
{
if (node_info->recovery_type == RECTYPE_PRIMARY)
{
appendPQExpBufferStr(details, "! running");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
node_info->node_name, node_info->node_id);
}
else
{
appendPQExpBufferStr(details, "! running as standby");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
node_info->node_name, node_info->node_id);
}
}
}
/* node is up but cannot connect */
else if (node_info->node_status == NODE_STATUS_REJECTED)
{
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? running");
}
else
{
appendPQExpBufferStr(details, "! running");
error_found = true;
}
}
/* node is unreachable */
else
{
/* node is unreachable but marked active */
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? unreachable");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
node_info->node_name, node_info->node_id);
}
/* node is unreachable and marked as inactive */
else
{
appendPQExpBufferStr(details, "- failed");
error_found = true;
}
}
}
break;
case STANDBY:
{
/* node is reachable */
if (node_info->node_status == NODE_STATUS_UP)
{
if (node_info->active == true)
{
switch (node_info->recovery_type)
{
case RECTYPE_STANDBY:
appendPQExpBufferStr(details, " running");
break;
case RECTYPE_PRIMARY:
appendPQExpBufferStr(details, "! running as primary");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is registered as standby but running as primary",
node_info->node_name, node_info->node_id);
break;
case RECTYPE_UNKNOWN:
appendPQExpBufferStr(details, "! unknown");
item_list_append_format(
warnings,
"node \"%s\" (ID: %i) has unknown replication status",
node_info->node_name, node_info->node_id);
break;
}
}
else
{
if (node_info->recovery_type == RECTYPE_STANDBY)
{
appendPQExpBufferStr(details, "! running");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
node_info->node_name, node_info->node_id);
}
else
{
appendPQExpBufferStr(details, "! running as primary");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
node_info->node_name, node_info->node_id);
}
}
/* warn about issue with paused WAL replay */
if (is_wal_replay_paused(node_info->conn, true))
{
item_list_append_format(warnings,
_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
node_info->node_name, node_info->node_id);
}
}
/* node is up but cannot connect */
else if (node_info->node_status == NODE_STATUS_REJECTED)
{
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? running");
}
else
{
appendPQExpBufferStr(details, "! running");
error_found = true;
}
}
/* node is unreachable */
else
{
/* node is unreachable but marked active */
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? unreachable");
item_list_append_format(warnings,
"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
node_info->node_name, node_info->node_id);
}
else
{
appendPQExpBufferStr(details, "- failed");
error_found = true;
}
}
}
break;
case WITNESS:
case BDR:
{
/* node is reachable */
if (node_info->node_status == NODE_STATUS_UP)
{
if (node_info->active == true)
{
appendPQExpBufferStr(details, "* running");
}
else
{
appendPQExpBufferStr(details, "! running");
error_found = true;
}
}
/* node is up but cannot connect */
else if (node_info->node_status == NODE_STATUS_REJECTED)
{
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? rejected");
}
else
{
appendPQExpBufferStr(details, "! failed");
error_found = true;
}
}
/* node is unreachable */
else
{
if (node_info->active == true)
{
appendPQExpBufferStr(details, "? unreachable");
}
else
{
appendPQExpBufferStr(details, "- failed");
error_found = true;
}
}
}
break;
case UNKNOWN:
{
/* this should never happen */
appendPQExpBufferStr(details, "? unknown node type");
error_found = true;
}
break;
}
return error_found;
}
static const char * static const char *
action_name(const int action) action_name(const int action)
{ {
@@ -2044,9 +2293,10 @@ print_error_list(ItemList *error_list, int log_level)
void void
print_status_header(int cols, ColHeader *headers) print_status_header(int cols, ColHeader *headers)
{ {
int i; int i, di;
int max_cols = 0; int max_cols = 0;
/* count how many columns we actually need to display */ /* count how many columns we actually need to display */
for (i = 0; i < cols; i++) for (i = 0; i < cols; i++)
{ {
@@ -2073,7 +2323,8 @@ print_status_header(int cols, ColHeader *headers)
printf("\n"); printf("\n");
printf("-"); printf("-");
for (i = 0; i < max_cols; i++) di = 0;
for (i = 0; i < cols; i++)
{ {
int j; int j;
@@ -2083,10 +2334,11 @@ print_status_header(int cols, ColHeader *headers)
for (j = 0; j < headers[i].max_length; j++) for (j = 0; j < headers[i].max_length; j++)
printf("-"); printf("-");
if (i < (max_cols - 1)) if (di < (max_cols - 1))
printf("-+-"); printf("-+-");
else else
printf("-"); printf("-");
di++;
} }
printf("\n"); printf("\n");

View File

@@ -100,6 +100,7 @@
#define OPT_COMPACT 1045 #define OPT_COMPACT 1045
#define OPT_DISABLE_WAL_RECEIVER 1046 #define OPT_DISABLE_WAL_RECEIVER 1046
#define OPT_ENABLE_WAL_RECEIVER 1047 #define OPT_ENABLE_WAL_RECEIVER 1047
#define OPT_DETAIL 1048
/* deprecated since 3.3 */ /* deprecated since 3.3 */
#define OPT_DATA_DIR 999 #define OPT_DATA_DIR 999
@@ -122,6 +123,7 @@ static struct option long_options[] =
{"wait", optional_argument, NULL, 'w'}, {"wait", optional_argument, NULL, 'w'},
{"no-wait", no_argument, NULL, 'W'}, {"no-wait", no_argument, NULL, 'W'},
{"compact", no_argument, NULL, OPT_COMPACT}, {"compact", no_argument, NULL, OPT_COMPACT},
{"detail", no_argument, NULL, OPT_DETAIL},
/* connection options */ /* connection options */
{"dbname", required_argument, NULL, 'd'}, {"dbname", required_argument, NULL, 'd'},

View File

@@ -115,6 +115,9 @@ item_list_append_format(ItemList *item_list, const char *format,...)
ItemListCell *cell; ItemListCell *cell;
va_list arglist; va_list arglist;
if (item_list == NULL)
return;
cell = (ItemListCell *) pg_malloc0(sizeof(ItemListCell)); cell = (ItemListCell *) pg_malloc0(sizeof(ItemListCell));
if (cell == NULL) if (cell == NULL)