mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Improvements to repmgr cluster show
Add documentation; show recovery status in --csv mode.
This commit is contained in:
62
README.md
62
README.md
@@ -23,10 +23,12 @@ Simply:
|
||||
Ensure `pg_config` for the target PostgreSQL version is in `$PATH`.
|
||||
|
||||
|
||||
Commands
|
||||
--------
|
||||
Reference
|
||||
---------
|
||||
|
||||
Currently available:
|
||||
### repmgr commands
|
||||
|
||||
The following commands are available:
|
||||
|
||||
repmgr primary register
|
||||
repmgr primary unregister
|
||||
@@ -38,10 +40,64 @@ Currently available:
|
||||
repmgr standby follow
|
||||
|
||||
repmgr bdr register
|
||||
repmgr bdr unregister
|
||||
|
||||
repmgr cluster show
|
||||
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
||||
|
||||
|
||||
* `primary register`
|
||||
|
||||
Registers a primary in a streaming replication cluster, and configures
|
||||
it for use with repmgr. This command needs to be executed before any
|
||||
standby nodes are registered.
|
||||
|
||||
`master register` can be used as an alias for `primary register`.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each active node in the replication cluster. This
|
||||
command polls each registered server and shows its role (`master` / `standby` /
|
||||
`bdr`) and status. It polls each server directly and can be run on any node
|
||||
in the cluster; this is also useful when analyzing connectivity from a particular
|
||||
node.
|
||||
|
||||
This command requires either a valid `repmgr.conf` file or a database connection
|
||||
string to one of the registered nodes; no additional arguments are needed.
|
||||
|
||||
Example:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
ID | Name | Role | Status | Upstream | Connection string
|
||||
----+-------+---------+-----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | * running | | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | host=db_node3 dbname=repmgr user=repmgr
|
||||
|
||||
To show database connection errors when polling nodes, run the command in
|
||||
`--verbose` mode.
|
||||
|
||||
The `cluster show` command accepts an optional parameter `--csv`, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||
1,-1,-1
|
||||
2,0,0
|
||||
3,0,1
|
||||
|
||||
The columns have following meanings:
|
||||
|
||||
- node ID
|
||||
- availability (0 = available, -1 = unavailable)
|
||||
- recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
|
||||
Note that the availability is tested by connecting from the node where
|
||||
`repmgr cluster show` is executed, and does not necessarily imply the node
|
||||
is down.
|
||||
|
||||
|
||||
Backwards compatibility
|
||||
-----------------------
|
||||
|
||||
|
||||
@@ -1303,6 +1303,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
||||
|
||||
/* Set remaining struct fields with default values */
|
||||
node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||
node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||
node_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||
node_info->monitoring_state = MS_NORMAL;
|
||||
node_info->conn = NULL;
|
||||
|
||||
@@ -32,7 +32,7 @@ typedef enum {
|
||||
} ExtensionStatus;
|
||||
|
||||
typedef enum {
|
||||
RECTYPE_UNKNOWN = 0,
|
||||
RECTYPE_UNKNOWN = -1,
|
||||
RECTYPE_PRIMARY,
|
||||
RECTYPE_STANDBY
|
||||
} RecoveryType;
|
||||
@@ -79,6 +79,7 @@ typedef struct s_node_info
|
||||
/* used during failover to track node status */
|
||||
XLogRecPtr last_wal_receive_lsn;
|
||||
NodeStatus node_status;
|
||||
RecoveryType recovery_type;
|
||||
MonitoringState monitoring_state;
|
||||
PGconn *conn;
|
||||
/* for ad-hoc use e.g. when working with a list of nodes */
|
||||
@@ -100,6 +101,7 @@ typedef struct s_node_info
|
||||
"", \
|
||||
InvalidXLogRecPtr, \
|
||||
NODE_STATUS_UNKNOWN, \
|
||||
RECTYPE_UNKNOWN, \
|
||||
MS_NORMAL, \
|
||||
NULL, \
|
||||
"" \
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
|
||||
Standardisation on `primary`
|
||||
----------------------------
|
||||
|
||||
To standardise terminolgy, `primary` is used to denote the read/write
|
||||
node in a streaming replication cluster. `master` is still accepted
|
||||
as a synonym (e.g. `repmgr master register`).
|
||||
|
||||
|
||||
New command line options
|
||||
------------------------
|
||||
|
||||
@@ -29,6 +37,18 @@ Changed command line options
|
||||
configuration file option `monitoring_history`. This enables the
|
||||
setting to be changed without having to modify system service files.
|
||||
|
||||
Changes to repmgr commands
|
||||
--------------------------
|
||||
|
||||
|
||||
### `repmgr cluster show`
|
||||
|
||||
This now displays the role of each node (e.g. `primary`, `standby`)
|
||||
and its status in separate columns.
|
||||
|
||||
The `--csv` option now emits a third column indicating the recovery
|
||||
status of the node.
|
||||
|
||||
|
||||
Removed configuration file options
|
||||
----------------------------------
|
||||
|
||||
@@ -88,7 +88,6 @@ do_cluster_show(void)
|
||||
|
||||
for (cell = nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
RecoveryType rec_type = RECTYPE_UNKNOWN;
|
||||
PQExpBufferData details;
|
||||
|
||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
@@ -97,14 +96,12 @@ do_cluster_show(void)
|
||||
{
|
||||
cell->node_info->node_status = NODE_STATUS_UP;
|
||||
|
||||
if (cell->node_info->type != BDR)
|
||||
{
|
||||
rec_type = get_recovery_type(cell->node_info->conn);
|
||||
}
|
||||
cell->node_info->recovery_type = get_recovery_type(cell->node_info->conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
@@ -122,7 +119,7 @@ do_cluster_show(void)
|
||||
{
|
||||
if (cell->node_info->active == true)
|
||||
{
|
||||
switch (rec_type)
|
||||
switch (cell->node_info->recovery_type)
|
||||
{
|
||||
case RECTYPE_PRIMARY:
|
||||
appendPQExpBuffer(&details, "* running");
|
||||
@@ -137,7 +134,7 @@ do_cluster_show(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rec_type == RECTYPE_PRIMARY)
|
||||
if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
|
||||
appendPQExpBuffer(&details, "! running");
|
||||
else
|
||||
appendPQExpBuffer(&details, "! running as standby");
|
||||
@@ -162,7 +159,7 @@ do_cluster_show(void)
|
||||
{
|
||||
if (cell->node_info->active == true)
|
||||
{
|
||||
switch (rec_type)
|
||||
switch (cell->node_info->recovery_type)
|
||||
{
|
||||
case RECTYPE_STANDBY:
|
||||
appendPQExpBuffer(&details, " running");
|
||||
@@ -177,7 +174,7 @@ do_cluster_show(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rec_type == RECTYPE_STANDBY)
|
||||
if (cell->node_info->recovery_type == RECTYPE_STANDBY)
|
||||
appendPQExpBuffer(&details, "! running");
|
||||
else
|
||||
appendPQExpBuffer(&details, "! running as primary");
|
||||
@@ -216,6 +213,8 @@ do_cluster_show(void)
|
||||
break;
|
||||
case UNKNOWN:
|
||||
{
|
||||
/* this should never happen */
|
||||
appendPQExpBuffer(&details, "? unknown node type");
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -277,8 +276,29 @@ do_cluster_show(void)
|
||||
if (runtime_options.csv)
|
||||
{
|
||||
int connection_status = (PQstatus(conn) == CONNECTION_OK) ? 0 : -1;
|
||||
int recovery_type = RECTYPE_UNKNOWN;
|
||||
|
||||
printf("%i,%d\n", cell->node_info->node_id, connection_status);
|
||||
/*
|
||||
* here we explicitly convert the RecoveryType to integer values to
|
||||
* avoid implicit dependency on the values in the enum
|
||||
*/
|
||||
switch (cell->node_info->recovery_type)
|
||||
{
|
||||
case RECTYPE_UNKNOWN:
|
||||
recovery_type = -1;
|
||||
break;
|
||||
case RECTYPE_PRIMARY:
|
||||
recovery_type = 0;
|
||||
break;
|
||||
case RECTYPE_STANDBY:
|
||||
recovery_type = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%i,%i,%i\n",
|
||||
cell->node_info->node_id,
|
||||
connection_status,
|
||||
recovery_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -1048,6 +1048,10 @@ check_cli_parameters(const int action)
|
||||
}
|
||||
|
||||
}
|
||||
case CLUSTER_SHOW:
|
||||
if (runtime_options.connection_param_provided)
|
||||
config_file_required = false;
|
||||
break;
|
||||
case CLUSTER_EVENT:
|
||||
/* no required parameters */
|
||||
break;
|
||||
@@ -1066,6 +1070,7 @@ check_cli_parameters(const int action)
|
||||
{
|
||||
case STANDBY_CLONE:
|
||||
case STANDBY_FOLLOW:
|
||||
case CLUSTER_SHOW:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(&cli_warnings,
|
||||
|
||||
Reference in New Issue
Block a user