mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
Improvements to repmgr cluster show
Add documentation; show recovery status in --csv mode.
This commit is contained in:
62
README.md
62
README.md
@@ -23,10 +23,12 @@ Simply:
|
|||||||
Ensure `pg_config` for the target PostgreSQL version is in `$PATH`.
|
Ensure `pg_config` for the target PostgreSQL version is in `$PATH`.
|
||||||
|
|
||||||
|
|
||||||
Commands
|
Reference
|
||||||
--------
|
---------
|
||||||
|
|
||||||
Currently available:
|
### repmgr commands
|
||||||
|
|
||||||
|
The following commands are available:
|
||||||
|
|
||||||
repmgr primary register
|
repmgr primary register
|
||||||
repmgr primary unregister
|
repmgr primary unregister
|
||||||
@@ -38,10 +40,64 @@ Currently available:
|
|||||||
repmgr standby follow
|
repmgr standby follow
|
||||||
|
|
||||||
repmgr bdr register
|
repmgr bdr register
|
||||||
|
repmgr bdr unregister
|
||||||
|
|
||||||
|
repmgr cluster show
|
||||||
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
||||||
|
|
||||||
|
|
||||||
|
* `primary register`
|
||||||
|
|
||||||
|
Registers a primary in a streaming replication cluster, and configures
|
||||||
|
it for use with repmgr. This command needs to be executed before any
|
||||||
|
standby nodes are registered.
|
||||||
|
|
||||||
|
`master register` can be used as an alias for `primary register`.
|
||||||
|
|
||||||
|
* `cluster show`
|
||||||
|
|
||||||
|
Displays information about each active node in the replication cluster. This
|
||||||
|
command polls each registered server and shows its role (`master` / `standby` /
|
||||||
|
`bdr`) and status. It polls each server directly and can be run on any node
|
||||||
|
in the cluster; this is also useful when analyzing connectivity from a particular
|
||||||
|
node.
|
||||||
|
|
||||||
|
This command requires either a valid `repmgr.conf` file or a database connection
|
||||||
|
string to one of the registered nodes; no additional arguments are needed.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster show
|
||||||
|
|
||||||
|
ID | Name | Role | Status | Upstream | Connection string
|
||||||
|
----+-------+---------+-----------+----------+-----------------------------------------
|
||||||
|
1 | node1 | primary | * running | | host=db_node1 dbname=repmgr user=repmgr
|
||||||
|
2 | node2 | standby | running | node1 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
|
3 | node3 | standby | running | node1 | host=db_node3 dbname=repmgr user=repmgr
|
||||||
|
|
||||||
|
To show database connection errors when polling nodes, run the command in
|
||||||
|
`--verbose` mode.
|
||||||
|
|
||||||
|
The `cluster show` command accepts an optional parameter `--csv`, which
|
||||||
|
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||||
|
parsing by scripts:
|
||||||
|
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||||
|
1,-1,-1
|
||||||
|
2,0,0
|
||||||
|
3,0,1
|
||||||
|
|
||||||
|
The columns have following meanings:
|
||||||
|
|
||||||
|
- node ID
|
||||||
|
- availability (0 = available, -1 = unavailable)
|
||||||
|
- recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||||
|
|
||||||
|
Note that the availability is tested by connecting from the node where
|
||||||
|
`repmgr cluster show` is executed, and does not necessarily imply the node
|
||||||
|
is down.
|
||||||
|
|
||||||
|
|
||||||
Backwards compatibility
|
Backwards compatibility
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
|||||||
@@ -1303,6 +1303,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
|||||||
|
|
||||||
/* Set remaining struct fields with default values */
|
/* Set remaining struct fields with default values */
|
||||||
node_info->node_status = NODE_STATUS_UNKNOWN;
|
node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||||
|
node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||||
node_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
node_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||||
node_info->monitoring_state = MS_NORMAL;
|
node_info->monitoring_state = MS_NORMAL;
|
||||||
node_info->conn = NULL;
|
node_info->conn = NULL;
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ typedef enum {
|
|||||||
} ExtensionStatus;
|
} ExtensionStatus;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
RECTYPE_UNKNOWN = 0,
|
RECTYPE_UNKNOWN = -1,
|
||||||
RECTYPE_PRIMARY,
|
RECTYPE_PRIMARY,
|
||||||
RECTYPE_STANDBY
|
RECTYPE_STANDBY
|
||||||
} RecoveryType;
|
} RecoveryType;
|
||||||
@@ -79,6 +79,7 @@ typedef struct s_node_info
|
|||||||
/* used during failover to track node status */
|
/* used during failover to track node status */
|
||||||
XLogRecPtr last_wal_receive_lsn;
|
XLogRecPtr last_wal_receive_lsn;
|
||||||
NodeStatus node_status;
|
NodeStatus node_status;
|
||||||
|
RecoveryType recovery_type;
|
||||||
MonitoringState monitoring_state;
|
MonitoringState monitoring_state;
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
/* for ad-hoc use e.g. when working with a list of nodes */
|
/* for ad-hoc use e.g. when working with a list of nodes */
|
||||||
@@ -100,6 +101,7 @@ typedef struct s_node_info
|
|||||||
"", \
|
"", \
|
||||||
InvalidXLogRecPtr, \
|
InvalidXLogRecPtr, \
|
||||||
NODE_STATUS_UNKNOWN, \
|
NODE_STATUS_UNKNOWN, \
|
||||||
|
RECTYPE_UNKNOWN, \
|
||||||
MS_NORMAL, \
|
MS_NORMAL, \
|
||||||
NULL, \
|
NULL, \
|
||||||
"" \
|
"" \
|
||||||
|
|||||||
@@ -1,4 +1,12 @@
|
|||||||
|
|
||||||
|
Standardisation on `primary`
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
To standardise terminolgy, `primary` is used to denote the read/write
|
||||||
|
node in a streaming replication cluster. `master` is still accepted
|
||||||
|
as a synonym (e.g. `repmgr master register`).
|
||||||
|
|
||||||
|
|
||||||
New command line options
|
New command line options
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
@@ -29,6 +37,18 @@ Changed command line options
|
|||||||
configuration file option `monitoring_history`. This enables the
|
configuration file option `monitoring_history`. This enables the
|
||||||
setting to be changed without having to modify system service files.
|
setting to be changed without having to modify system service files.
|
||||||
|
|
||||||
|
Changes to repmgr commands
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
|
||||||
|
### `repmgr cluster show`
|
||||||
|
|
||||||
|
This now displays the role of each node (e.g. `primary`, `standby`)
|
||||||
|
and its status in separate columns.
|
||||||
|
|
||||||
|
The `--csv` option now emits a third column indicating the recovery
|
||||||
|
status of the node.
|
||||||
|
|
||||||
|
|
||||||
Removed configuration file options
|
Removed configuration file options
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
|||||||
@@ -88,7 +88,6 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
RecoveryType rec_type = RECTYPE_UNKNOWN;
|
|
||||||
PQExpBufferData details;
|
PQExpBufferData details;
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
@@ -97,14 +96,12 @@ do_cluster_show(void)
|
|||||||
{
|
{
|
||||||
cell->node_info->node_status = NODE_STATUS_UP;
|
cell->node_info->node_status = NODE_STATUS_UP;
|
||||||
|
|
||||||
if (cell->node_info->type != BDR)
|
cell->node_info->recovery_type = get_recovery_type(cell->node_info->conn);
|
||||||
{
|
|
||||||
rec_type = get_recovery_type(cell->node_info->conn);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||||
|
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
@@ -122,7 +119,7 @@ do_cluster_show(void)
|
|||||||
{
|
{
|
||||||
if (cell->node_info->active == true)
|
if (cell->node_info->active == true)
|
||||||
{
|
{
|
||||||
switch (rec_type)
|
switch (cell->node_info->recovery_type)
|
||||||
{
|
{
|
||||||
case RECTYPE_PRIMARY:
|
case RECTYPE_PRIMARY:
|
||||||
appendPQExpBuffer(&details, "* running");
|
appendPQExpBuffer(&details, "* running");
|
||||||
@@ -137,7 +134,7 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (rec_type == RECTYPE_PRIMARY)
|
if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
|
||||||
appendPQExpBuffer(&details, "! running");
|
appendPQExpBuffer(&details, "! running");
|
||||||
else
|
else
|
||||||
appendPQExpBuffer(&details, "! running as standby");
|
appendPQExpBuffer(&details, "! running as standby");
|
||||||
@@ -162,7 +159,7 @@ do_cluster_show(void)
|
|||||||
{
|
{
|
||||||
if (cell->node_info->active == true)
|
if (cell->node_info->active == true)
|
||||||
{
|
{
|
||||||
switch (rec_type)
|
switch (cell->node_info->recovery_type)
|
||||||
{
|
{
|
||||||
case RECTYPE_STANDBY:
|
case RECTYPE_STANDBY:
|
||||||
appendPQExpBuffer(&details, " running");
|
appendPQExpBuffer(&details, " running");
|
||||||
@@ -177,7 +174,7 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (rec_type == RECTYPE_STANDBY)
|
if (cell->node_info->recovery_type == RECTYPE_STANDBY)
|
||||||
appendPQExpBuffer(&details, "! running");
|
appendPQExpBuffer(&details, "! running");
|
||||||
else
|
else
|
||||||
appendPQExpBuffer(&details, "! running as primary");
|
appendPQExpBuffer(&details, "! running as primary");
|
||||||
@@ -216,6 +213,8 @@ do_cluster_show(void)
|
|||||||
break;
|
break;
|
||||||
case UNKNOWN:
|
case UNKNOWN:
|
||||||
{
|
{
|
||||||
|
/* this should never happen */
|
||||||
|
appendPQExpBuffer(&details, "? unknown node type");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -277,8 +276,29 @@ do_cluster_show(void)
|
|||||||
if (runtime_options.csv)
|
if (runtime_options.csv)
|
||||||
{
|
{
|
||||||
int connection_status = (PQstatus(conn) == CONNECTION_OK) ? 0 : -1;
|
int connection_status = (PQstatus(conn) == CONNECTION_OK) ? 0 : -1;
|
||||||
|
int recovery_type = RECTYPE_UNKNOWN;
|
||||||
|
|
||||||
printf("%i,%d\n", cell->node_info->node_id, connection_status);
|
/*
|
||||||
|
* here we explicitly convert the RecoveryType to integer values to
|
||||||
|
* avoid implicit dependency on the values in the enum
|
||||||
|
*/
|
||||||
|
switch (cell->node_info->recovery_type)
|
||||||
|
{
|
||||||
|
case RECTYPE_UNKNOWN:
|
||||||
|
recovery_type = -1;
|
||||||
|
break;
|
||||||
|
case RECTYPE_PRIMARY:
|
||||||
|
recovery_type = 0;
|
||||||
|
break;
|
||||||
|
case RECTYPE_STANDBY:
|
||||||
|
recovery_type = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%i,%i,%i\n",
|
||||||
|
cell->node_info->node_id,
|
||||||
|
connection_status,
|
||||||
|
recovery_type);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1048,6 +1048,10 @@ check_cli_parameters(const int action)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
case CLUSTER_SHOW:
|
||||||
|
if (runtime_options.connection_param_provided)
|
||||||
|
config_file_required = false;
|
||||||
|
break;
|
||||||
case CLUSTER_EVENT:
|
case CLUSTER_EVENT:
|
||||||
/* no required parameters */
|
/* no required parameters */
|
||||||
break;
|
break;
|
||||||
@@ -1066,6 +1070,7 @@ check_cli_parameters(const int action)
|
|||||||
{
|
{
|
||||||
case STANDBY_CLONE:
|
case STANDBY_CLONE:
|
||||||
case STANDBY_FOLLOW:
|
case STANDBY_FOLLOW:
|
||||||
|
case CLUSTER_SHOW:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
item_list_append_format(&cli_warnings,
|
item_list_append_format(&cli_warnings,
|
||||||
|
|||||||
Reference in New Issue
Block a user