mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
cluster show: display timeline ID
This helps provide a better picture of the state of the cluster, i.e. making it more obvious whether there's been a timeline divergence. This also provides infrastructure for further improvements in cluster status display and diagnosis. Note this is only available in PostgreSQL 9.6 and later as it relies on the SQL functions for interrogating pg_control, which can be executed remotely. As PostgreSQL 9.5 will shortly be the only community-supported version without these functions, it's not worth the effort of trying to duplicate their functionality.
This commit is contained in:
1
HISTORY
1
HISTORY
@@ -4,6 +4,7 @@
|
|||||||
repmgr: add "--repmgrd-force-unpause" option to "standby switchover" (Ian)
|
repmgr: add "--repmgrd-force-unpause" option to "standby switchover" (Ian)
|
||||||
repmgr: improve "--dry-run" behaviour for "standby promote" and
|
repmgr: improve "--dry-run" behaviour for "standby promote" and
|
||||||
"standby switchover" (Ian)
|
"standby switchover" (Ian)
|
||||||
|
repmgr: display node timeline ID in "cluster show" output (Ian)
|
||||||
repmgr: in "cluster show" and "daemon status", show upstream node name
|
repmgr: in "cluster show" and "daemon status", show upstream node name
|
||||||
as reported by each individual node (Ian)
|
as reported by each individual node (Ian)
|
||||||
repmgr: in "cluster show" and "daemon status", check if a node is attached
|
repmgr: in "cluster show" and "daemon status", check if a node is attached
|
||||||
|
|||||||
39
dbutils.c
39
dbutils.c
@@ -1600,7 +1600,7 @@ system_identifier(PGconn *conn)
|
|||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_db_error(conn, NULL, _("get_system_identifier(): unable to query pg_control_system()"));
|
log_db_error(conn, NULL, _("system_identifier(): unable to query pg_control_system()"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -3407,6 +3407,10 @@ clear_node_info_list(NodeInfoList *nodes)
|
|||||||
while (cell != NULL)
|
while (cell != NULL)
|
||||||
{
|
{
|
||||||
next_cell = cell->next;
|
next_cell = cell->next;
|
||||||
|
|
||||||
|
if (cell->node_info->replication_info != NULL)
|
||||||
|
pfree(cell->node_info->replication_info);
|
||||||
|
|
||||||
pfree(cell->node_info);
|
pfree(cell->node_info);
|
||||||
pfree(cell);
|
pfree(cell);
|
||||||
cell = next_cell;
|
cell = next_cell;
|
||||||
@@ -5073,6 +5077,7 @@ init_replication_info(ReplInfo *replication_info)
|
|||||||
{
|
{
|
||||||
memset(replication_info->current_timestamp, 0, sizeof(replication_info->current_timestamp));
|
memset(replication_info->current_timestamp, 0, sizeof(replication_info->current_timestamp));
|
||||||
replication_info->in_recovery = false;
|
replication_info->in_recovery = false;
|
||||||
|
replication_info->timeline_id = UNKNOWN_TIMELINE_ID;
|
||||||
replication_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
replication_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||||
replication_info->last_wal_replay_lsn = InvalidXLogRecPtr;
|
replication_info->last_wal_replay_lsn = InvalidXLogRecPtr;
|
||||||
memset(replication_info->last_xact_replay_timestamp, 0, sizeof(replication_info->last_xact_replay_timestamp));
|
memset(replication_info->last_xact_replay_timestamp, 0, sizeof(replication_info->last_xact_replay_timestamp));
|
||||||
@@ -5259,6 +5264,38 @@ get_replication_lag_seconds(PGconn *conn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TimeLineID
|
||||||
|
get_node_timeline(PGconn *conn)
|
||||||
|
{
|
||||||
|
TimeLineID timeline_id = UNKNOWN_TIMELINE_ID;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PG_control_checkpoint() was introduced in PostgreSQL 9.6
|
||||||
|
*/
|
||||||
|
if (PQserverVersion(conn) < 90600)
|
||||||
|
{
|
||||||
|
return UNKNOWN_TIMELINE_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = PQexec(conn, "SELECT timeline_id FROM pg_catalog.pg_control_checkpoint()");
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_db_error(conn, NULL, _("get_node_timeline(): unable to query pg_control_system()"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
timeline_id = atoi(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return timeline_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
get_node_replication_stats(PGconn *conn, t_node_info *node_info)
|
get_node_replication_stats(PGconn *conn, t_node_info *node_info)
|
||||||
{
|
{
|
||||||
|
|||||||
42
dbutils.h
42
dbutils.h
@@ -164,8 +164,28 @@ typedef struct s_extension_versions {
|
|||||||
UNKNOWN_SERVER_VERSION_NUM \
|
UNKNOWN_SERVER_VERSION_NUM \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
char current_timestamp[MAXLEN];
|
||||||
|
bool in_recovery;
|
||||||
|
TimeLineID timeline_id;
|
||||||
|
XLogRecPtr last_wal_receive_lsn;
|
||||||
|
XLogRecPtr last_wal_replay_lsn;
|
||||||
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
|
int replication_lag_time;
|
||||||
|
bool receiving_streamed_wal;
|
||||||
|
bool wal_replay_paused;
|
||||||
|
int upstream_last_seen;
|
||||||
|
int upstream_node_id;
|
||||||
|
} ReplInfo;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct to store node information
|
* Struct to store node information.
|
||||||
|
*
|
||||||
|
* The first section represents the contents of the "repmgr.nodes"
|
||||||
|
* table; subsequent section contain information collated in
|
||||||
|
* various contexts.
|
||||||
*/
|
*/
|
||||||
typedef struct s_node_info
|
typedef struct s_node_info
|
||||||
{
|
{
|
||||||
@@ -199,6 +219,8 @@ typedef struct s_node_info
|
|||||||
int total_replication_slots;
|
int total_replication_slots;
|
||||||
int active_replication_slots;
|
int active_replication_slots;
|
||||||
int inactive_replication_slots;
|
int inactive_replication_slots;
|
||||||
|
/* replication info */
|
||||||
|
ReplInfo *replication_info;
|
||||||
} t_node_info;
|
} t_node_info;
|
||||||
|
|
||||||
|
|
||||||
@@ -225,7 +247,8 @@ typedef struct s_node_info
|
|||||||
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
||||||
"", true, true, \
|
"", true, true, \
|
||||||
/* various statistics */ \
|
/* various statistics */ \
|
||||||
-1, -1, -1, -1, -1, -1 \
|
-1, -1, -1, -1, -1, -1, \
|
||||||
|
NULL \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -338,19 +361,7 @@ typedef struct BdrNodeInfoList
|
|||||||
0 \
|
0 \
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
char current_timestamp[MAXLEN];
|
|
||||||
bool in_recovery;
|
|
||||||
XLogRecPtr last_wal_receive_lsn;
|
|
||||||
XLogRecPtr last_wal_replay_lsn;
|
|
||||||
char last_xact_replay_timestamp[MAXLEN];
|
|
||||||
int replication_lag_time;
|
|
||||||
bool receiving_streamed_wal;
|
|
||||||
bool wal_replay_paused;
|
|
||||||
int upstream_last_seen;
|
|
||||||
int upstream_node_id;
|
|
||||||
} ReplInfo;
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@@ -602,6 +613,7 @@ XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
|||||||
void init_replication_info(ReplInfo *replication_info);
|
void init_replication_info(ReplInfo *replication_info);
|
||||||
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
||||||
int get_replication_lag_seconds(PGconn *conn);
|
int get_replication_lag_seconds(PGconn *conn);
|
||||||
|
TimeLineID get_node_timeline(PGconn *conn);
|
||||||
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||||
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name);
|
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name);
|
||||||
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
||||||
|
|||||||
@@ -97,6 +97,13 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
|
<link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>:
|
||||||
|
display each node's timeline ID (PostgreSQL 9.6 and later only).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
<link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>
|
<link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>
|
||||||
and <link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>:
|
and <link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>:
|
||||||
show the upstream node name as reported by each individual node - this helps visualise
|
show the upstream node name as reported by each individual node - this helps visualise
|
||||||
|
|||||||
@@ -22,7 +22,9 @@
|
|||||||
directly and can be run on any node in the cluster; this is also useful when analyzing
|
directly and can be run on any node in the cluster; this is also useful when analyzing
|
||||||
connectivity from a particular node.
|
connectivity from a particular node.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
For PostgreSQL 9.6 and later, the output will also contain the node's current timeline ID.
|
||||||
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Node availability is tested by connecting from the node where
|
Node availability is tested by connecting from the node where
|
||||||
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
||||||
@@ -52,11 +54,11 @@
|
|||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show
|
||||||
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
|
||||||
----+-------+---------+-----------+----------+----------+----------+-----------------------------------------
|
----+-------+---------+-----------+----------+----------+----------+-----------------------------------------
|
||||||
1 | node1 | primary | * running | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | * running | | default | 100 | 1 | host=db_node1 dbname=repmgr user=repmgr
|
||||||
2 | node2 | standby | running | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
2 | node2 | standby | running | node1 | default | 100 | 1 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
3 | node3 | standby | running | node1 | default | 100 | 1 | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
|
|||||||
@@ -24,7 +24,7 @@
|
|||||||
#include "repmgr-client-global.h"
|
#include "repmgr-client-global.h"
|
||||||
#include "repmgr-action-cluster.h"
|
#include "repmgr-action-cluster.h"
|
||||||
|
|
||||||
#define SHOW_HEADER_COUNT 8
|
#define SHOW_HEADER_COUNT 9
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
@@ -35,6 +35,7 @@ typedef enum
|
|||||||
SHOW_UPSTREAM_NAME,
|
SHOW_UPSTREAM_NAME,
|
||||||
SHOW_LOCATION,
|
SHOW_LOCATION,
|
||||||
SHOW_PRIORITY,
|
SHOW_PRIORITY,
|
||||||
|
SHOW_TIMELINE_ID,
|
||||||
SHOW_CONNINFO
|
SHOW_CONNINFO
|
||||||
} ShowHeader;
|
} ShowHeader;
|
||||||
|
|
||||||
@@ -113,9 +114,15 @@ do_cluster_show(void)
|
|||||||
strncpy(headers_show[SHOW_LOCATION].title, _("Location"), MAXLEN);
|
strncpy(headers_show[SHOW_LOCATION].title, _("Location"), MAXLEN);
|
||||||
|
|
||||||
if (runtime_options.compact == true)
|
if (runtime_options.compact == true)
|
||||||
|
{
|
||||||
strncpy(headers_show[SHOW_PRIORITY].title, _("Prio."), MAXLEN);
|
strncpy(headers_show[SHOW_PRIORITY].title, _("Prio."), MAXLEN);
|
||||||
|
strncpy(headers_show[SHOW_TIMELINE_ID].title, _("TLI"), MAXLEN);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
strncpy(headers_show[SHOW_PRIORITY].title, _("Priority"), MAXLEN);
|
strncpy(headers_show[SHOW_PRIORITY].title, _("Priority"), MAXLEN);
|
||||||
|
strncpy(headers_show[SHOW_TIMELINE_ID].title, _("Timeline"), MAXLEN);
|
||||||
|
}
|
||||||
|
|
||||||
strncpy(headers_show[SHOW_CONNINFO].title, _("Connection string"), MAXLEN);
|
strncpy(headers_show[SHOW_CONNINFO].title, _("Connection string"), MAXLEN);
|
||||||
|
|
||||||
@@ -128,6 +135,16 @@ do_cluster_show(void)
|
|||||||
{
|
{
|
||||||
headers_show[i].display = true;
|
headers_show[i].display = true;
|
||||||
|
|
||||||
|
/* Don't display timeline on pre-9.6 clusters */
|
||||||
|
if (i == SHOW_TIMELINE_ID)
|
||||||
|
{
|
||||||
|
if (PQserverVersion(conn) < 90600)
|
||||||
|
{
|
||||||
|
headers_show[i].display = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if --compact provided, don't display conninfo */
|
||||||
if (runtime_options.compact == true)
|
if (runtime_options.compact == true)
|
||||||
{
|
{
|
||||||
if (i == SHOW_CONNINFO)
|
if (i == SHOW_CONNINFO)
|
||||||
@@ -136,6 +153,7 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (headers_show[i].display == true)
|
if (headers_show[i].display == true)
|
||||||
{
|
{
|
||||||
headers_show[i].max_length = strlen(headers_show[i].title);
|
headers_show[i].max_length = strlen(headers_show[i].title);
|
||||||
@@ -154,6 +172,15 @@ do_cluster_show(void)
|
|||||||
PQExpBufferData upstream;
|
PQExpBufferData upstream;
|
||||||
PQExpBufferData buf;
|
PQExpBufferData buf;
|
||||||
|
|
||||||
|
cell->node_info->replication_info = palloc0(sizeof(ReplInfo));
|
||||||
|
if (cell->node_info->replication_info == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("unable to allocate memory"));
|
||||||
|
exit(ERR_INTERNAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_replication_info(cell->node_info->replication_info);
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
@@ -176,6 +203,11 @@ do_cluster_show(void)
|
|||||||
cell->node_info->node_name, cell->node_info->node_id);
|
cell->node_info->node_name, cell->node_info->node_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* NOP on pre-9.6 servers */
|
||||||
|
cell->node_info->replication_info->timeline_id = get_node_timeline(cell->node_info->conn);
|
||||||
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&node_status);
|
initPQExpBuffer(&node_status);
|
||||||
initPQExpBuffer(&upstream);
|
initPQExpBuffer(&upstream);
|
||||||
@@ -212,7 +244,18 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
||||||
|
|
||||||
|
if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID)
|
||||||
|
{
|
||||||
|
/* display "?" */
|
||||||
|
headers_show[SHOW_PRIORITY].cur_length = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
initPQExpBuffer(&buf);
|
||||||
|
appendPQExpBuffer(&buf, "%i", cell->node_info->replication_info->timeline_id);
|
||||||
|
headers_show[SHOW_PRIORITY].cur_length = strlen(buf.data);
|
||||||
|
termPQExpBuffer(&buf);
|
||||||
|
}
|
||||||
|
|
||||||
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
||||||
|
|
||||||
@@ -277,6 +320,14 @@ do_cluster_show(void)
|
|||||||
printf("| %-*s ", headers_show[SHOW_LOCATION].max_length, cell->node_info->location);
|
printf("| %-*s ", headers_show[SHOW_LOCATION].max_length, cell->node_info->location);
|
||||||
printf("| %-*i ", headers_show[SHOW_PRIORITY].max_length, cell->node_info->priority);
|
printf("| %-*i ", headers_show[SHOW_PRIORITY].max_length, cell->node_info->priority);
|
||||||
|
|
||||||
|
if (headers_show[SHOW_TIMELINE_ID].display == true)
|
||||||
|
{
|
||||||
|
if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID)
|
||||||
|
printf("| %-*c ", headers_show[SHOW_TIMELINE_ID].max_length, '?');
|
||||||
|
else
|
||||||
|
printf("| %-*i ", headers_show[SHOW_TIMELINE_ID].max_length, (int)cell->node_info->replication_info->timeline_id);
|
||||||
|
}
|
||||||
|
|
||||||
if (headers_show[SHOW_CONNINFO].display == true)
|
if (headers_show[SHOW_CONNINFO].display == true)
|
||||||
{
|
{
|
||||||
printf("| %-*s", headers_show[SHOW_CONNINFO].max_length, cell->node_info->conninfo);
|
printf("| %-*s", headers_show[SHOW_CONNINFO].max_length, cell->node_info->conninfo);
|
||||||
|
|||||||
Reference in New Issue
Block a user