From 07097575b1a606360046abc035214312ba54c382 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Sat, 23 Feb 2019 12:16:34 +0900 Subject: [PATCH] daemon status: add column "upstream last seen" This displays the interval (in seconds) since the repmgrd instance on each node last confirmed its upstream node is available. --- HISTORY | 1 + dbutils.c | 5 ++- dbutils.h | 2 ++ doc/appendix-release-notes.sgml | 8 +++++ doc/repmgr-daemon-status.sgml | 55 +++++++++++++++++++-------------- repmgr-action-daemon.c | 51 ++++++++++++++++++++++++++---- 6 files changed, 91 insertions(+), 31 deletions(-) diff --git a/HISTORY b/HISTORY index d7ee6288..f384e151 100644 --- a/HISTORY +++ b/HISTORY @@ -21,6 +21,7 @@ repmgr: fix long node ID display in "cluster show" (Ian) repmgr: check for primary server before executing "witness register"; GitHub #538 (Ian) + repmgr: show "upstream last seen" interval in "daemon status" output (Ian) repmgr: "node check" will only consider physical replication slots (Ian) repmgrd: check binary and extension major versions match; GitHub #515 (Ian) repmgrd: on a cascaded standby, don't fail over if "failover=manual"; diff --git a/dbutils.c b/dbutils.c index 92071f51..a84ac464 100644 --- a/dbutils.c +++ b/dbutils.c @@ -5086,7 +5086,10 @@ get_primary_last_seen(PGconn *conn) initPQExpBuffer(&query); appendPQExpBufferStr(&query, - "SELECT repmgr.get_primary_last_seen()"); + "SELECT CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE " + " THEN -1 " + " ELSE repmgr.get_primary_last_seen() " + " END AS primary_last_seen "); res = PQexec(conn, query.data); diff --git a/dbutils.h b/dbutils.h index ecfcb7cf..b9934102 100644 --- a/dbutils.h +++ b/dbutils.h @@ -354,6 +354,8 @@ typedef struct RepmgrdInfo { char repmgrd_running[MAXLEN]; bool paused; bool wal_paused_pending_wal; + int upstream_last_seen; + char upstream_last_seen_text[MAXLEN]; } RepmgrdInfo; diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index 9412c62b..8aeda485 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -79,6 +79,14 @@ REPMGRD_OPTS="--daemonize=false" + + + repmgr daemon status + displays the interval (in seconds) since the repmgrd instance + last verified its upstream node was available. + + + Add option to repmgr cluster show (GitHub #521). diff --git a/doc/repmgr-daemon-status.sgml b/doc/repmgr-daemon-status.sgml index de0d0dc8..febd8c47 100644 --- a/doc/repmgr-daemon-status.sgml +++ b/doc/repmgr-daemon-status.sgml @@ -49,33 +49,34 @@ repmgrd running normally on all nodes: $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? -----+-------+---------+----------+---------+---------+------+--------- - 1 | node1 | primary | 100 | running | running | 5722 | no - 2 | node2 | standby | 100 | running | running | 5731 | no - 3 | node3 | standby | 100 | running | running | 5779 | no + ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+----------+---------+---------+-------+---------+-------------------- + 1 | node1 | primary | 100 | running | running | 71987 | no | n/a + 2 | node2 | standby | 100 | running | running | 71996 | no | 1 second(s) ago + 3 | node3 | standby | 100 | running | running | 72042 | no | 1 second(s) ago + repmgrd paused on all nodes (using ): $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? -----+-------+---------+----------+---------+---------+------+--------- - 1 | node1 | primary | 100 | running | running | 5722 | yes - 2 | node2 | standby | 100 | running | running | 5731 | yes - 3 | node3 | standby | 100 | running | running | 5779 | yes + ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+----------+---------+---------+-------+---------+-------------------- + 1 | node1 | primary | 100 | running | running | 71987 | yes | n/a + 2 | node2 | standby | 100 | running | running | 71996 | yes | 0 second(s) ago + 3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago + repmgrd not running on one node: $ repmgr -f /etc/repmgr.conf daemon status - ID | Name | Role | Priority | Status | repmgrd | PID | Paused? -----+-------+---------+----------+---------+-------------+------+--------- - 1 | node1 | primary | 100 | running | running | 5722 | yes - 2 | node2 | standby | 100 | running | not running | n/a | n/a - 3 | node3 | standby | 100 | running | running | 5779 | yes + ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen +----+-------+---------+----------+---------+-------------+-------+---------+-------------------- + 1 | node1 | primary | 100 | running | running | 71987 | yes | n/a + 2 | node2 | standby | 100 | running | not running | n/a | n/a | n/a + 3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago - @@ -92,9 +93,9 @@ parsing by scripts, e.g.: $ repmgr -f /etc/repmgr.conf daemon status --csv - 1,node1,primary,1,1,5722,1,100 - 2,node2,standby,1,0,-1,1,100 - 3,node3,standby,1,1,5779,1,100 + 1,node1,primary,1,1,5722,1,100,-1 + 2,node2,standby,1,0,-1,1,100,1 + 3,node3,standby,1,1,5779,1,100,1 The columns have following meanings: @@ -141,11 +142,17 @@ - - - repmgrd priority - - + + + repmgrd node priority + + + + + + interval in seconds since the node's upstream was last seen + + diff --git a/repmgr-action-daemon.c b/repmgr-action-daemon.c index ecbf3c49..fddfa59c 100644 --- a/repmgr-action-daemon.c +++ b/repmgr-action-daemon.c @@ -47,10 +47,11 @@ typedef enum STATUS_PG, STATUS_RUNNING, STATUS_PID, - STATUS_PAUSED + STATUS_PAUSED, + STATUS_UPSTREAM_LAST_SEEN } StatusHeader; -#define STATUS_HEADER_COUNT 8 +#define STATUS_HEADER_COUNT 9 struct ColHeader headers_status[STATUS_HEADER_COUNT]; @@ -101,6 +102,12 @@ do_daemon_status(void) strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN); strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN); + if (runtime_options.compact == true) + strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstr. last"), MAXLEN); + else + strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN); + + for (i = 0; i < STATUS_HEADER_COUNT; i++) { headers_status[i].max_length = strlen(headers_status[i].title); @@ -122,6 +129,7 @@ do_daemon_status(void) repmgrd_info[i]->running = false; repmgrd_info[i]->pg_running = true; repmgrd_info[i]->wal_paused_pending_wal = false; + repmgrd_info[i]->upstream_last_seen = -1; cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); @@ -193,6 +201,24 @@ do_daemon_status(void) } } + repmgrd_info[i]->upstream_last_seen = get_primary_last_seen(cell->node_info->conn); + + if (repmgrd_info[i]->upstream_last_seen < 0) + { + maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, "%s", _("n/a")); + } + else + { + if (runtime_options.compact == true) + { + maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i sec(s) ago"), repmgrd_info[i]->upstream_last_seen); + } + else + { + maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen); + } + } + PQfinish(cell->node_info->conn); } @@ -209,6 +235,8 @@ do_daemon_status(void) headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running); headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text); + headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text); + for (j = 0; j < STATUS_HEADER_COUNT; j++) { if (headers_status[j].cur_length > headers_status[j].max_length) @@ -232,7 +260,7 @@ do_daemon_status(void) { if (runtime_options.output_mode == OM_CSV) { - printf("%i,%s,%s,%i,%i,%i,%i,%i\n", + printf("%i,%s,%s,%i,%i,%i,%i,%i,%i\n", cell->node_info->node_id, cell->node_info->node_name, get_node_type_string(cell->node_info->type), @@ -240,7 +268,10 @@ do_daemon_status(void) repmgrd_info[i]->running ? 1 : 0, repmgrd_info[i]->pid, repmgrd_info[i]->paused ? 1 : 0, - cell->node_info->priority); + cell->node_info->priority, + repmgrd_info[i]->pid == UNKNOWN_PID + ? -1 + : repmgrd_info[i]->upstream_last_seen); } else { @@ -254,9 +285,17 @@ do_daemon_status(void) printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text); if (repmgrd_info[i]->pid == UNKNOWN_PID) - printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a"); + { + printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, _("n/a")); + printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, _("n/a")); + + } else - printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no"); + { + printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? _("yes") : _("no")); + + printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, repmgrd_info[i]->upstream_last_seen_text); + } printf("\n"); }