diff --git a/HISTORY b/HISTORY index fe6d96a6..9d088ed9 100644 --- a/HISTORY +++ b/HISTORY @@ -4,6 +4,7 @@ repmgr: add "--repmgrd-force-unpause" option to "standby switchover" (Ian) repmgr: improve "--dry-run" behaviour for "standby promote" and "standby switchover" (Ian) + repmgr: display node timeline ID in "cluster show" output (Ian) repmgr: in "cluster show" and "daemon status", show upstream node name as reported by each individual node (Ian) repmgr: in "cluster show" and "daemon status", check if a node is attached diff --git a/dbutils.c b/dbutils.c index a29d5d50..790bee28 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1600,7 +1600,7 @@ system_identifier(PGconn *conn) if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_db_error(conn, NULL, _("get_system_identifier(): unable to query pg_control_system()")); + log_db_error(conn, NULL, _("system_identifier(): unable to query pg_control_system()")); } else { @@ -3407,6 +3407,10 @@ clear_node_info_list(NodeInfoList *nodes) while (cell != NULL) { next_cell = cell->next; + + if (cell->node_info->replication_info != NULL) + pfree(cell->node_info->replication_info); + pfree(cell->node_info); pfree(cell); cell = next_cell; @@ -5073,6 +5077,7 @@ init_replication_info(ReplInfo *replication_info) { memset(replication_info->current_timestamp, 0, sizeof(replication_info->current_timestamp)); replication_info->in_recovery = false; + replication_info->timeline_id = UNKNOWN_TIMELINE_ID; replication_info->last_wal_receive_lsn = InvalidXLogRecPtr; replication_info->last_wal_replay_lsn = InvalidXLogRecPtr; memset(replication_info->last_xact_replay_timestamp, 0, sizeof(replication_info->last_xact_replay_timestamp)); @@ -5259,6 +5264,38 @@ get_replication_lag_seconds(PGconn *conn) } + +TimeLineID +get_node_timeline(PGconn *conn) +{ + TimeLineID timeline_id = UNKNOWN_TIMELINE_ID; + PGresult *res = NULL; + + /* + * PG_control_checkpoint() was introduced in PostgreSQL 9.6 + */ + if (PQserverVersion(conn) < 90600) + { + return UNKNOWN_TIMELINE_ID; + } + + res = PQexec(conn, "SELECT timeline_id FROM pg_catalog.pg_control_checkpoint()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_db_error(conn, NULL, _("get_node_timeline(): unable to query pg_control_system()")); + } + else + { + timeline_id = atoi(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return timeline_id; +} + + void get_node_replication_stats(PGconn *conn, t_node_info *node_info) { diff --git a/dbutils.h b/dbutils.h index 36adfd5b..f193e1b5 100644 --- a/dbutils.h +++ b/dbutils.h @@ -164,8 +164,28 @@ typedef struct s_extension_versions { UNKNOWN_SERVER_VERSION_NUM \ } + +typedef struct +{ + char current_timestamp[MAXLEN]; + bool in_recovery; + TimeLineID timeline_id; + XLogRecPtr last_wal_receive_lsn; + XLogRecPtr last_wal_replay_lsn; + char last_xact_replay_timestamp[MAXLEN]; + int replication_lag_time; + bool receiving_streamed_wal; + bool wal_replay_paused; + int upstream_last_seen; + int upstream_node_id; +} ReplInfo; + /* - * Struct to store node information + * Struct to store node information. + * + * The first section represents the contents of the "repmgr.nodes" + * table; subsequent section contain information collated in + * various contexts. */ typedef struct s_node_info { @@ -199,6 +219,8 @@ typedef struct s_node_info int total_replication_slots; int active_replication_slots; int inactive_replication_slots; + /* replication info */ + ReplInfo *replication_info; } t_node_info; @@ -225,7 +247,8 @@ typedef struct s_node_info /* for ad-hoc use e.g. when working with a list of nodes */ \ "", true, true, \ /* various statistics */ \ - -1, -1, -1, -1, -1, -1 \ + -1, -1, -1, -1, -1, -1, \ + NULL \ } @@ -338,19 +361,7 @@ typedef struct BdrNodeInfoList 0 \ } -typedef struct -{ - char current_timestamp[MAXLEN]; - bool in_recovery; - XLogRecPtr last_wal_receive_lsn; - XLogRecPtr last_wal_replay_lsn; - char last_xact_replay_timestamp[MAXLEN]; - int replication_lag_time; - bool receiving_streamed_wal; - bool wal_replay_paused; - int upstream_last_seen; - int upstream_node_id; -} ReplInfo; + typedef struct { @@ -602,6 +613,7 @@ XLogRecPtr get_last_wal_receive_location(PGconn *conn); void init_replication_info(ReplInfo *replication_info); bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info); int get_replication_lag_seconds(PGconn *conn); +TimeLineID get_node_timeline(PGconn *conn); void get_node_replication_stats(PGconn *conn, t_node_info *node_info); NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name); void set_upstream_last_seen(PGconn *conn, int upstream_node_id); diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index 3f30b0f0..05187a07 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -97,6 +97,13 @@ + repmgr cluster show: + display each node's timeline ID (PostgreSQL 9.6 and later only). + + + + + repmgr cluster show and repmgr daemon status: show the upstream node name as reported by each individual node - this helps visualise diff --git a/doc/repmgr-cluster-show.xml b/doc/repmgr-cluster-show.xml index 196d9b87..b06d326e 100644 --- a/doc/repmgr-cluster-show.xml +++ b/doc/repmgr-cluster-show.xml @@ -22,7 +22,9 @@ directly and can be run on any node in the cluster; this is also useful when analyzing connectivity from a particular node. - + + For PostgreSQL 9.6 and later, the output will also contain the node's current timeline ID. + Node availability is tested by connecting from the node where repmgr cluster show is executed, and does not necessarily imply the node @@ -52,11 +54,11 @@ $ repmgr -f /etc/repmgr.conf cluster show - ID | Name | Role | Status | Upstream | Location | Priority | Connection string + ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string ----+-------+---------+-----------+----------+----------+----------+----------------------------------------- - 1 | node1 | primary | * running | | default | 100 | host=db_node1 dbname=repmgr user=repmgr - 2 | node2 | standby | running | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr - 3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr + 1 | node1 | primary | * running | | default | 100 | 1 | host=db_node1 dbname=repmgr user=repmgr + 2 | node2 | standby | running | node1 | default | 100 | 1 | host=db_node2 dbname=repmgr user=repmgr + 3 | node3 | standby | running | node1 | default | 100 | 1 | host=db_node3 dbname=repmgr user=repmgr diff --git a/repmgr-action-cluster.c b/repmgr-action-cluster.c index 99201e80..53ff8e56 100644 --- a/repmgr-action-cluster.c +++ b/repmgr-action-cluster.c @@ -24,7 +24,7 @@ #include "repmgr-client-global.h" #include "repmgr-action-cluster.h" -#define SHOW_HEADER_COUNT 8 +#define SHOW_HEADER_COUNT 9 typedef enum { @@ -35,6 +35,7 @@ typedef enum SHOW_UPSTREAM_NAME, SHOW_LOCATION, SHOW_PRIORITY, + SHOW_TIMELINE_ID, SHOW_CONNINFO } ShowHeader; @@ -113,9 +114,15 @@ do_cluster_show(void) strncpy(headers_show[SHOW_LOCATION].title, _("Location"), MAXLEN); if (runtime_options.compact == true) + { strncpy(headers_show[SHOW_PRIORITY].title, _("Prio."), MAXLEN); + strncpy(headers_show[SHOW_TIMELINE_ID].title, _("TLI"), MAXLEN); + } else + { strncpy(headers_show[SHOW_PRIORITY].title, _("Priority"), MAXLEN); + strncpy(headers_show[SHOW_TIMELINE_ID].title, _("Timeline"), MAXLEN); + } strncpy(headers_show[SHOW_CONNINFO].title, _("Connection string"), MAXLEN); @@ -128,6 +135,16 @@ do_cluster_show(void) { headers_show[i].display = true; + /* Don't display timeline on pre-9.6 clusters */ + if (i == SHOW_TIMELINE_ID) + { + if (PQserverVersion(conn) < 90600) + { + headers_show[i].display = false; + } + } + + /* if --compact provided, don't display conninfo */ if (runtime_options.compact == true) { if (i == SHOW_CONNINFO) @@ -136,6 +153,7 @@ do_cluster_show(void) } } + if (headers_show[i].display == true) { headers_show[i].max_length = strlen(headers_show[i].title); @@ -154,6 +172,15 @@ do_cluster_show(void) PQExpBufferData upstream; PQExpBufferData buf; + cell->node_info->replication_info = palloc0(sizeof(ReplInfo)); + if (cell->node_info->replication_info == NULL) + { + log_error(_("unable to allocate memory")); + exit(ERR_INTERNAL); + } + + init_replication_info(cell->node_info->replication_info); + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); if (PQstatus(cell->node_info->conn) != CONNECTION_OK) @@ -176,6 +203,11 @@ do_cluster_show(void) cell->node_info->node_name, cell->node_info->node_id); } } + else + { + /* NOP on pre-9.6 servers */ + cell->node_info->replication_info->timeline_id = get_node_timeline(cell->node_info->conn); + } initPQExpBuffer(&node_status); initPQExpBuffer(&upstream); @@ -212,7 +244,18 @@ do_cluster_show(void) headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location); - + if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID) + { + /* display "?" */ + headers_show[SHOW_PRIORITY].cur_length = 1; + } + else + { + initPQExpBuffer(&buf); + appendPQExpBuffer(&buf, "%i", cell->node_info->replication_info->timeline_id); + headers_show[SHOW_PRIORITY].cur_length = strlen(buf.data); + termPQExpBuffer(&buf); + } headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo); @@ -277,6 +320,14 @@ do_cluster_show(void) printf("| %-*s ", headers_show[SHOW_LOCATION].max_length, cell->node_info->location); printf("| %-*i ", headers_show[SHOW_PRIORITY].max_length, cell->node_info->priority); + if (headers_show[SHOW_TIMELINE_ID].display == true) + { + if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID) + printf("| %-*c ", headers_show[SHOW_TIMELINE_ID].max_length, '?'); + else + printf("| %-*i ", headers_show[SHOW_TIMELINE_ID].max_length, (int)cell->node_info->replication_info->timeline_id); + } + if (headers_show[SHOW_CONNINFO].display == true) { printf("| %-*s", headers_show[SHOW_CONNINFO].max_length, cell->node_info->conninfo);