From 9fe2fa2daf457fb014ee2296d15af883665dfd82 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 25 Apr 2019 14:44:41 +0900 Subject: [PATCH] daemon status: make output more like that of "cluster show" In particular make any issues with unexpected server state more obvious. --- HISTORY | 3 + dbutils.c | 6 +- dbutils.h | 4 + doc/appendix-release-notes.sgml | 21 +++ doc/repmgr-daemon-status.sgml | 56 ++++--- repmgr-action-cluster.c | 227 +--------------------------- repmgr-action-daemon.c | 89 ++++++++--- repmgr-client-global.h | 5 +- repmgr-client.c | 260 +++++++++++++++++++++++++++++++- repmgr-client.h | 2 + strutil.c | 3 + 11 files changed, 397 insertions(+), 279 deletions(-) diff --git a/HISTORY b/HISTORY index 725b848a..05c6b0bc 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,6 @@ +4.4 2019-??-?? + repmgr: improve "daemon status" output (Ian) + 4.3.1 2019-??-?? repmgr: ensure BDR2-specific functionality cannot be used on BDR3 and later (Ian) diff --git a/dbutils.c b/dbutils.c index 30dac885..fc5afd36 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2447,8 +2447,7 @@ get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info) initPQExpBuffer(&query); appendPQExpBuffer(&query, - " SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " - " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached " + " SELECT " REPMGR_NODES_COLUMNS_WITH_UPSTREAM " FROM repmgr.nodes n " " LEFT JOIN repmgr.nodes un " " ON un.node_id = n.upstream_node_id" @@ -2747,8 +2746,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list) initPQExpBuffer(&query); appendPQExpBufferStr(&query, - " SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " - " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached " + " SELECT " REPMGR_NODES_COLUMNS_WITH_UPSTREAM " FROM repmgr.nodes n " " LEFT JOIN repmgr.nodes un " " ON un.node_id = n.upstream_node_id" diff --git a/dbutils.h b/dbutils.h index 6776e769..5c7cdbdc 100644 --- a/dbutils.h +++ b/dbutils.h @@ -30,6 +30,10 @@ #include "voting.h" #define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name, NULL AS attached " +#define REPMGR_NODES_COLUMNS_WITH_UPSTREAM "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached " + + + #define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''" #define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name" diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index a72628c9..71af8999 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -1,3 +1,4 @@ + Release notes @@ -18,6 +19,26 @@ Release 4.4 ???, 2019 + + + repmgr client enhancements + + + + + + repmgr daemon status: + make output similar to that of + repmgr cluster show + for consistency and to make it easier to identify nodes not in the expected + state. + + + + + + + diff --git a/doc/repmgr-daemon-status.sgml b/doc/repmgr-daemon-status.sgml index 1d91a374..e1a5a61a 100644 --- a/doc/repmgr-daemon-status.sgml +++ b/doc/repmgr-daemon-status.sgml @@ -52,33 +52,31 @@ repmgrd running normally on all nodes: $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen -----+-------+---------+----------+---------+---------+-------+---------+-------------------- - 1 | node1 | primary | 100 | running | running | 71987 | no | n/a - 2 | node2 | standby | 100 | running | running | 71996 | no | 1 second(s) ago - 3 | node3 | standby | 100 | running | running | 72042 | no | 1 second(s) ago - + ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+-----------+----------+---------+-------+---------+-------------------- + 1 | node1 | primary | * running | | running | 96563 | no | n/a + 2 | node2 | standby | running | node1 | running | 96572 | no | 1 second(s) ago + 3 | node3 | standby | running | node1 | running | 96584 | no | 0 second(s) ago repmgrd paused on all nodes (using ): $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen -----+-------+---------+----------+---------+---------+-------+---------+-------------------- - 1 | node1 | primary | 100 | running | running | 71987 | yes | n/a - 2 | node2 | standby | 100 | running | running | 71996 | yes | 0 second(s) ago - 3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago - + ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+-----------+----------+---------+-------+---------+-------------------- + 1 | node1 | primary | * running | | running | 96563 | yes | n/a + 2 | node2 | standby | running | node1 | running | 96572 | yes | 1 second(s) ago + 3 | node3 | standby | running | node1 | running | 96584 | yes | 0 second(s) ago repmgrd not running on one node: $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen -----+-------+---------+----------+---------+-------------+-------+---------+-------------------- - 1 | node1 | primary | 100 | running | running | 71987 | yes | n/a - 2 | node2 | standby | 100 | running | not running | n/a | n/a | n/a - 3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago + ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+-----------+----------+-------------+-------+---------+-------------------- + 1 | node1 | primary | * running | | running | 96563 | yes | n/a + 2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a + 3 | node3 | standby | running | node1 | running | 96584 | yes | 0 second(s) ago @@ -96,9 +94,9 @@ parsing by scripts, e.g.: $ repmgr -f /etc/repmgr.conf daemon status --csv - 1,node1,primary,1,1,5722,1,100,-1 - 2,node2,standby,1,0,-1,1,100,1 - 3,node3,standby,1,1,5779,1,100,1 + 1,node1,primary,1,1,5722,1,100,-1,default + 2,node2,standby,1,0,-1,1,100,1,default + 3,node3,standby,1,1,5779,1,100,1,default The columns have following meanings: @@ -157,9 +155,25 @@ + + + node location + + + + + + + + + + Display additional information (location, priority) + about the &repmgr; configuration. + + @@ -175,8 +189,6 @@ - - See also diff --git a/repmgr-action-cluster.c b/repmgr-action-cluster.c index 971c4ea7..b21ecb54 100644 --- a/repmgr-action-cluster.c +++ b/repmgr-action-cluster.c @@ -184,231 +184,10 @@ do_cluster_show(void) initPQExpBuffer(&details); - /* - * TODO: count nodes marked as "? unreachable" and add a hint about - * the other cluster commands for better determining whether - * unreachable. - */ - switch (cell->node_info->type) - { - case PRIMARY: - { - /* node is reachable */ - if (cell->node_info->node_status == NODE_STATUS_UP) - { - if (cell->node_info->active == true) - { - switch (cell->node_info->recovery_type) - { - case RECTYPE_PRIMARY: - appendPQExpBufferStr(&details, "* running"); - break; - case RECTYPE_STANDBY: - appendPQExpBufferStr(&details, "! running as standby"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is registered as primary but running as standby", - cell->node_info->node_name, cell->node_info->node_id); - break; - case RECTYPE_UNKNOWN: - appendPQExpBufferStr(&details, "! unknown"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) has unknown replication status", - cell->node_info->node_name, cell->node_info->node_id); - break; - } - } - else - { - if (cell->node_info->recovery_type == RECTYPE_PRIMARY) - { - appendPQExpBufferStr(&details, "! running"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is running but the repmgr node record is inactive", - cell->node_info->node_name, cell->node_info->node_id); - } - else - { - appendPQExpBufferStr(&details, "! running as standby"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is registered as an inactive primary but running as standby", - cell->node_info->node_name, cell->node_info->node_id); - } - } - } - /* node is up but cannot connect */ - else if (cell->node_info->node_status == NODE_STATUS_REJECTED) - { - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? running"); - } - else - { - appendPQExpBufferStr(&details, "! running"); - error_found = true; - } - } - /* node is unreachable */ - else - { - /* node is unreachable but marked active */ - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? unreachable"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is registered as an active primary but is unreachable", - cell->node_info->node_name, cell->node_info->node_id); - } - /* node is unreachable and marked as inactive */ - else - { - appendPQExpBufferStr(&details, "- failed"); - error_found = true; - } - } - } - break; - case STANDBY: - { - /* node is reachable */ - if (cell->node_info->node_status == NODE_STATUS_UP) - { - if (cell->node_info->active == true) - { - switch (cell->node_info->recovery_type) - { - case RECTYPE_STANDBY: - appendPQExpBufferStr(&details, " running"); - break; - case RECTYPE_PRIMARY: - appendPQExpBufferStr(&details, "! running as primary"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is registered as standby but running as primary", - cell->node_info->node_name, cell->node_info->node_id); - break; - case RECTYPE_UNKNOWN: - appendPQExpBufferStr(&details, "! unknown"); - item_list_append_format( - &warnings, - "node \"%s\" (ID: %i) has unknown replication status", - cell->node_info->node_name, cell->node_info->node_id); - break; - } - } - else - { - if (cell->node_info->recovery_type == RECTYPE_STANDBY) - { - appendPQExpBufferStr(&details, "! running"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is running but the repmgr node record is inactive", - cell->node_info->node_name, cell->node_info->node_id); - } - else - { - appendPQExpBufferStr(&details, "! running as primary"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive", - cell->node_info->node_name, cell->node_info->node_id); - } - } - - /* warn about issue with paused WAL replay */ - if (is_wal_replay_paused(cell->node_info->conn, true)) - { - item_list_append_format(&warnings, - _("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"), - cell->node_info->node_name, cell->node_info->node_id); - } - } - /* node is up but cannot connect */ - else if (cell->node_info->node_status == NODE_STATUS_REJECTED) - { - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? running"); - } - else - { - appendPQExpBufferStr(&details, "! running"); - error_found = true; - } - } - /* node is unreachable */ - else - { - /* node is unreachable but marked active */ - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? unreachable"); - item_list_append_format(&warnings, - "node \"%s\" (ID: %i) is registered as an active standby but is unreachable", - cell->node_info->node_name, cell->node_info->node_id); - } - else - { - appendPQExpBufferStr(&details, "- failed"); - error_found = true; - } - } - } - - break; - case WITNESS: - case BDR: - { - /* node is reachable */ - if (cell->node_info->node_status == NODE_STATUS_UP) - { - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "* running"); - } - else - { - appendPQExpBufferStr(&details, "! running"); - error_found = true; - } - } - /* node is up but cannot connect */ - else if (cell->node_info->node_status == NODE_STATUS_REJECTED) - { - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? rejected"); - } - else - { - appendPQExpBufferStr(&details, "! failed"); - error_found = true; - } - - } - /* node is unreachable */ - else - { - if (cell->node_info->active == true) - { - appendPQExpBufferStr(&details, "? unreachable"); - } - else - { - appendPQExpBufferStr(&details, "- failed"); - error_found = true; - } - } - } - break; - case UNKNOWN: - { - /* this should never happen */ - appendPQExpBufferStr(&details, "? unknown node type"); - error_found = true; - } - break; - } - + if (format_node_status(cell->node_info, &details, &warnings) == false) + error_found = true; strncpy(cell->node_info->details, details.data, MAXLEN); + termPQExpBuffer(&details); PQfinish(cell->node_info->conn); diff --git a/repmgr-action-daemon.c b/repmgr-action-daemon.c index 97e7c4a6..6a533f06 100644 --- a/repmgr-action-daemon.c +++ b/repmgr-action-daemon.c @@ -43,15 +43,17 @@ typedef enum STATUS_ID = 0, STATUS_NAME, STATUS_ROLE, - STATUS_PRIORITY, STATUS_PG, - STATUS_RUNNING, + STATUS_UPSTREAM_NAME, + STATUS_LOCATION, + STATUS_PRIORITY, + STATUS_REPMGRD, STATUS_PID, STATUS_PAUSED, STATUS_UPSTREAM_LAST_SEEN } StatusHeader; -#define STATUS_HEADER_COUNT 9 +#define STATUS_HEADER_COUNT 11 struct ColHeader headers_status[STATUS_HEADER_COUNT]; @@ -91,14 +93,17 @@ do_daemon_status(void) strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN); strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN); strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN); + strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN); + strncpy(headers_status[STATUS_UPSTREAM_NAME].title, _("Upstream"), MAXLEN); + /* following only displayed with the --detail option */ + strncpy(headers_status[STATUS_LOCATION].title, _("Location"), MAXLEN); if (runtime_options.compact == true) strncpy(headers_status[STATUS_PRIORITY].title, _("Prio."), MAXLEN); else strncpy(headers_status[STATUS_PRIORITY].title, _("Priority"), MAXLEN); - strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN); - strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN); + strncpy(headers_status[STATUS_REPMGRD].title, _("repmgrd"), MAXLEN); strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN); strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN); @@ -107,19 +112,23 @@ do_daemon_status(void) else strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN); - for (i = 0; i < STATUS_HEADER_COUNT; i++) { headers_status[i].max_length = strlen(headers_status[i].title); headers_status[i].display = true; } + if (runtime_options.detail == false) + { + headers_status[STATUS_LOCATION].display = false; + headers_status[STATUS_PRIORITY].display = false; + } + i = 0; for (cell = nodes.head; cell; cell = cell->next) { int j; - PQExpBufferData buf; repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo)); repmgrd_info[i]->node_id = cell->node_info->node_id; @@ -135,6 +144,14 @@ do_daemon_status(void) if (PQstatus(cell->node_info->conn) != CONNECTION_OK) { + /* check if node is reachable, but just not letting us in */ + if (is_server_available_quiet(cell->node_info->conninfo)) + cell->node_info->node_status = NODE_STATUS_REJECTED; + else + cell->node_info->node_status = NODE_STATUS_DOWN; + + cell->node_info->recovery_type = RECTYPE_UNKNOWN; + connection_error_found = true; if (runtime_options.verbose) @@ -155,13 +172,15 @@ do_daemon_status(void) } repmgrd_info[i]->pg_running = false; - maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running")); + //maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running")); maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a")); maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a")); } else { - maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running")); + cell->node_info->node_status = NODE_STATUS_UP; + cell->node_info->recovery_type = get_recovery_type(cell->node_info->conn); + //maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running")); repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn); @@ -217,22 +236,38 @@ do_daemon_status(void) maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen); } } - - PQfinish(cell->node_info->conn); } + { + PQExpBufferData details; + initPQExpBuffer(&details); + + (void)format_node_status(cell->node_info, &details, &warnings); + strncpy(repmgrd_info[i]->pg_running_text, details.data, MAXLEN); + termPQExpBuffer(&details); + } + + PQfinish(cell->node_info->conn); headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name); headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type)); + headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text); + headers_status[STATUS_UPSTREAM_NAME].cur_length = strlen(cell->node_info->upstream_node_name); - initPQExpBuffer(&buf); - appendPQExpBuffer(&buf, "%i", cell->node_info->priority); - headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data); - termPQExpBuffer(&buf); + if (runtime_options.detail == true) + { + PQExpBufferData buf; + + headers_status[STATUS_LOCATION].cur_length = strlen(cell->node_info->location); + + initPQExpBuffer(&buf); + appendPQExpBuffer(&buf, "%i", cell->node_info->priority); + headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data); + termPQExpBuffer(&buf); + } headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text); - headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running); - headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text); + headers_status[STATUS_REPMGRD].cur_length = strlen(repmgrd_info[i]->repmgrd_running); headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text); @@ -269,7 +304,7 @@ do_daemon_status(void) paused = -1; } - printf("%i,%s,%s,%i,%i,%i,%i,%i,%i\n", + printf("%i,%s,%s,%i,%i,%i,%i,%i,%i,%s\n", cell->node_info->node_id, cell->node_info->node_name, get_node_type_string(cell->node_info->type), @@ -280,17 +315,24 @@ do_daemon_status(void) cell->node_info->priority, repmgrd_info[i]->pid == UNKNOWN_PID ? -1 - : repmgrd_info[i]->upstream_last_seen); + : repmgrd_info[i]->upstream_last_seen, + cell->node_info->priority); } else { printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id); printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name); printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type)); - printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority); - printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text); - printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running); + printf("| %-*s ", headers_status[STATUS_UPSTREAM_NAME].max_length, cell->node_info->upstream_node_name); + + if (runtime_options.detail == true) + { + printf("| %-*s ", headers_status[STATUS_LOCATION].max_length, cell->node_info->location); + printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority); + } + + printf("| %-*s ", headers_status[STATUS_REPMGRD].max_length, repmgrd_info[i]->repmgrd_running); printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text); if (repmgrd_info[i]->pid == UNKNOWN_PID) @@ -441,7 +483,7 @@ _do_repmgr_pause(bool pause) void fetch_node_records(PGconn *conn, NodeInfoList *node_list) { - bool success = get_all_node_records(conn, node_list); + bool success = get_all_node_records_with_upstream(conn, node_list); if (success == false) { @@ -756,6 +798,7 @@ void do_daemon_help(void) printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n")); puts(""); printf(_(" --csv emit output as CSV\n")); + printf(_(" --detail show additional detail\n")); printf(_(" --verbose show text of database connection error messages\n")); puts(""); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index fe56cef9..9998056b 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -45,6 +45,7 @@ typedef struct int wait; bool no_wait; bool compact; + bool detail; /* logging options */ char log_level[MAXLEN]; /* overrides setting in repmgr.conf */ @@ -143,7 +144,7 @@ typedef struct /* configuration metadata */ \ false, false, false, false, false, \ /* general configuration options */ \ - "", false, false, "", -1, false, false, \ + "", false, false, "", -1, false, false, false, \ /* logging options */ \ "", false, false, false, false, \ /* output options */ \ @@ -241,8 +242,8 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record); extern void make_repmgrd_path(PQExpBufferData *output_buf); - /* display functions */ +extern bool format_node_status(t_node_info *node_info, PQExpBufferData *details, ItemList *warnings); extern void print_help_header(void); extern void print_status_header(int cols, ColHeader *headers); diff --git a/repmgr-client.c b/repmgr-client.c index fca9c0a9..cf865205 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -271,6 +271,10 @@ main(int argc, char **argv) runtime_options.compact = true; break; + /* --detail */ + case OPT_DETAIL: + runtime_options.detail = true; + break; /*---------------------------- * database connection options @@ -1916,7 +1920,6 @@ check_cli_parameters(const int action) } /* --compact */ - if (runtime_options.compact == true) { switch (action) @@ -1931,6 +1934,20 @@ check_cli_parameters(const int action) } } + /* --detail */ + if (runtime_options.compact == true) + { + switch (action) + { + case DAEMON_STATUS: + break; + default: + item_list_append_format(&cli_warnings, + _("--detail is not effective when executing %s"), + action_name(action)); + } + } + /* --disable-wal-receiver / --enable-wal-receiver */ if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true) { @@ -1955,6 +1972,238 @@ check_cli_parameters(const int action) } +bool +format_node_status(t_node_info *node_info, PQExpBufferData *details, ItemList *warnings) +{ + bool error_found = false; + + /* + * TODO: count nodes marked as "? unreachable" and add a hint about + * the other cluster commands for better determining whether + * unreachable. + */ + switch (node_info->type) + { + case PRIMARY: + { + /* node is reachable */ + if (node_info->node_status == NODE_STATUS_UP) + { + if (node_info->active == true) + { + switch (node_info->recovery_type) + { + case RECTYPE_PRIMARY: + appendPQExpBufferStr(details, "* running"); + break; + case RECTYPE_STANDBY: + appendPQExpBufferStr(details, "! running as standby"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is registered as primary but running as standby", + node_info->node_name, node_info->node_id); + break; + case RECTYPE_UNKNOWN: + appendPQExpBufferStr(details, "! unknown"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) has unknown replication status", + node_info->node_name, node_info->node_id); + break; + } + } + else + { + if (node_info->recovery_type == RECTYPE_PRIMARY) + { + appendPQExpBufferStr(details, "! running"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is running but the repmgr node record is inactive", + node_info->node_name, node_info->node_id); + } + else + { + appendPQExpBufferStr(details, "! running as standby"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is registered as an inactive primary but running as standby", + node_info->node_name, node_info->node_id); + } + } + } + /* node is up but cannot connect */ + else if (node_info->node_status == NODE_STATUS_REJECTED) + { + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? running"); + } + else + { + appendPQExpBufferStr(details, "! running"); + error_found = true; + } + } + /* node is unreachable */ + else + { + /* node is unreachable but marked active */ + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? unreachable"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is registered as an active primary but is unreachable", + node_info->node_name, node_info->node_id); + } + /* node is unreachable and marked as inactive */ + else + { + appendPQExpBufferStr(details, "- failed"); + error_found = true; + } + } + } + break; + case STANDBY: + { + /* node is reachable */ + if (node_info->node_status == NODE_STATUS_UP) + { + if (node_info->active == true) + { + switch (node_info->recovery_type) + { + case RECTYPE_STANDBY: + appendPQExpBufferStr(details, " running"); + break; + case RECTYPE_PRIMARY: + appendPQExpBufferStr(details, "! running as primary"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is registered as standby but running as primary", + node_info->node_name, node_info->node_id); + break; + case RECTYPE_UNKNOWN: + appendPQExpBufferStr(details, "! unknown"); + item_list_append_format( + warnings, + "node \"%s\" (ID: %i) has unknown replication status", + node_info->node_name, node_info->node_id); + break; + } + } + else + { + if (node_info->recovery_type == RECTYPE_STANDBY) + { + appendPQExpBufferStr(details, "! running"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is running but the repmgr node record is inactive", + node_info->node_name, node_info->node_id); + } + else + { + appendPQExpBufferStr(details, "! running as primary"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive", + node_info->node_name, node_info->node_id); + } + } + + /* warn about issue with paused WAL replay */ + if (is_wal_replay_paused(node_info->conn, true)) + { + item_list_append_format(warnings, + _("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"), + node_info->node_name, node_info->node_id); + } + } + /* node is up but cannot connect */ + else if (node_info->node_status == NODE_STATUS_REJECTED) + { + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? running"); + } + else + { + appendPQExpBufferStr(details, "! running"); + error_found = true; + } + } + /* node is unreachable */ + else + { + /* node is unreachable but marked active */ + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? unreachable"); + item_list_append_format(warnings, + "node \"%s\" (ID: %i) is registered as an active standby but is unreachable", + node_info->node_name, node_info->node_id); + } + else + { + appendPQExpBufferStr(details, "- failed"); + error_found = true; + } + } + } + + break; + case WITNESS: + case BDR: + { + /* node is reachable */ + if (node_info->node_status == NODE_STATUS_UP) + { + if (node_info->active == true) + { + appendPQExpBufferStr(details, "* running"); + } + else + { + appendPQExpBufferStr(details, "! running"); + error_found = true; + } + } + /* node is up but cannot connect */ + else if (node_info->node_status == NODE_STATUS_REJECTED) + { + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? rejected"); + } + else + { + appendPQExpBufferStr(details, "! failed"); + error_found = true; + } + } + /* node is unreachable */ + else + { + if (node_info->active == true) + { + appendPQExpBufferStr(details, "? unreachable"); + } + else + { + appendPQExpBufferStr(details, "- failed"); + error_found = true; + } + } + } + break; + case UNKNOWN: + { + /* this should never happen */ + appendPQExpBufferStr(details, "? unknown node type"); + error_found = true; + } + break; + } + + return error_found; +} + + static const char * action_name(const int action) { @@ -2044,9 +2293,10 @@ print_error_list(ItemList *error_list, int log_level) void print_status_header(int cols, ColHeader *headers) { - int i; + int i, di; int max_cols = 0; + /* count how many columns we actually need to display */ for (i = 0; i < cols; i++) { @@ -2073,7 +2323,8 @@ print_status_header(int cols, ColHeader *headers) printf("\n"); printf("-"); - for (i = 0; i < max_cols; i++) + di = 0; + for (i = 0; i < cols; i++) { int j; @@ -2083,10 +2334,11 @@ print_status_header(int cols, ColHeader *headers) for (j = 0; j < headers[i].max_length; j++) printf("-"); - if (i < (max_cols - 1)) + if (di < (max_cols - 1)) printf("-+-"); else printf("-"); + di++; } printf("\n"); diff --git a/repmgr-client.h b/repmgr-client.h index ed4bbf68..de2d7a97 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -100,6 +100,7 @@ #define OPT_COMPACT 1045 #define OPT_DISABLE_WAL_RECEIVER 1046 #define OPT_ENABLE_WAL_RECEIVER 1047 +#define OPT_DETAIL 1048 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 @@ -122,6 +123,7 @@ static struct option long_options[] = {"wait", optional_argument, NULL, 'w'}, {"no-wait", no_argument, NULL, 'W'}, {"compact", no_argument, NULL, OPT_COMPACT}, + {"detail", no_argument, NULL, OPT_DETAIL}, /* connection options */ {"dbname", required_argument, NULL, 'd'}, diff --git a/strutil.c b/strutil.c index 74edeb55..df339ee7 100644 --- a/strutil.c +++ b/strutil.c @@ -115,6 +115,9 @@ item_list_append_format(ItemList *item_list, const char *format,...) ItemListCell *cell; va_list arglist; + if (item_list == NULL) + return; + cell = (ItemListCell *) pg_malloc0(sizeof(ItemListCell)); if (cell == NULL)