mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Documentation update and miscellaneous code cleanup
This commit is contained in:
1
FAQ.md
1
FAQ.md
@@ -137,6 +137,7 @@ General
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the `repl_nodes` table.
|
||||
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
|
||||
|
||||
1
HISTORY
1
HISTORY
@@ -1,5 +1,6 @@
|
||||
3.2 2016-
|
||||
repmgr: add support for cloning from a Barman backup (Gianni)
|
||||
repmhr: add commands `standby matrix` and `standby diagnose` (Gianni)
|
||||
repmgr: suppress connection error display in `repmgr cluster show`
|
||||
unless `--verbose` supplied (Ian)
|
||||
repmgr: add commands `witness register` and `witness unregister` (Ian)
|
||||
|
||||
66
README.md
66
README.md
@@ -152,8 +152,9 @@ is not required, but is necessary in the following cases:
|
||||
|
||||
* if you need `repmgr` to copy configuration files from outside the PostgreSQL
|
||||
data directory
|
||||
* to perform switchover operations
|
||||
* when using `rsync` to clone a standby
|
||||
* to perform switchover operations
|
||||
* when executing `repmgr cluster matrix` and `repmgr cluster diagnose`
|
||||
|
||||
In these cases `rsync` is required on all servers too.
|
||||
|
||||
@@ -631,8 +632,8 @@ executable:
|
||||
|
||||
Then we check that `repmgr.conf` includes the following lines:
|
||||
|
||||
barman_server=barmansrv
|
||||
restore_command=/usr/local/bin/barman-wal-restore.py barmansrv test %f %p
|
||||
barman_server=barmansrv
|
||||
restore_command=/usr/local/bin/barman-wal-restore.py barmansrv test %f %p
|
||||
|
||||
To use a non-default Barman configuration file on the Barman server,
|
||||
specify this in `repmgr.conf` with `barman_config`:
|
||||
@@ -1609,11 +1610,12 @@ which contains connection details for the local database.
|
||||
- `cluster matrix` runs a `cluster show` on each node and arranges
|
||||
the results in a matrix, recording success or failure;
|
||||
|
||||
- `cluster diagnose` runs a `cluster matrix` on each node and
|
||||
- `cluster diagnose` runs a `cluster matrix` on each node and
|
||||
combines the results in a single matrix.
|
||||
|
||||
These commands require a valid `repmgr.conf` file on each node, and
|
||||
the optional `ssh_hostname` parameter must be set.
|
||||
These commands require a valid `repmgr.conf` file on each node.
|
||||
Additionally password-less `ssh` connections are required between
|
||||
all nodes.
|
||||
|
||||
Example 1 (all nodes up):
|
||||
|
||||
@@ -1629,7 +1631,7 @@ which contains connection details for the local database.
|
||||
possible connection.
|
||||
|
||||
|
||||
Example 2 (node1 and node2 up, node3 down):
|
||||
Example 2 (node1 and `node2` up, `node3` down):
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
@@ -1639,28 +1641,26 @@ which contains connection details for the local database.
|
||||
node2 | 2 | * | * | x
|
||||
node3 | 3 | ? | ? | ?
|
||||
|
||||
Each row corresponds to one server, and indicates the result of
|
||||
testing an outbound connection from that server.
|
||||
Each row corresponds to one server, and indicates the result of
|
||||
testing an outbound connection from that server.
|
||||
|
||||
Since node3 is down, all the entries in its row are filled with
|
||||
"?", meaning that there we cannot test outbound connections.
|
||||
Since `node3` is down, all the entries in its row are filled with
|
||||
"?", meaning that there we cannot test outbound connections.
|
||||
|
||||
The other two nodes are up; the corresponding rows have "x" in the
|
||||
column corresponding to node3, meaning that inbound connections to
|
||||
that node have failed, and "*" in the columns corresponding to
|
||||
node1 and node2, meaning that inbound connections to these nodes
|
||||
have succeeded.
|
||||
The other two nodes are up; the corresponding rows have "x" in the
|
||||
column corresponding to node3, meaning that inbound connections to
|
||||
that node have failed, and "*" in the columns corresponding to
|
||||
node1 and node2, meaning that inbound connections to these nodes
|
||||
have succeeded.
|
||||
|
||||
In this case, `cluster diagnose` gives the same result as `cluster
|
||||
In this case, `cluster diagnose` gives the same result as `cluster
|
||||
matrix`, because from any functioning node we can observe the same
|
||||
state: node1 and node2 are up, node3 is down.
|
||||
|
||||
state: `node1` and `node2` are up, `node3` is down.
|
||||
|
||||
Example 3 (all nodes up, firewall dropping packets originating
|
||||
from node1 and directed to port 5432 on node3)
|
||||
from `node1` and directed to port 5432 on node3)
|
||||
|
||||
Running `cluster matrix` from node1 gives the following output,
|
||||
after a long wait (two timeouts, by default one minute each):
|
||||
Running `cluster matrix` from `node1` gives the following output:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
@@ -1670,11 +1670,17 @@ which contains connection details for the local database.
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | ? | ? | ?
|
||||
|
||||
The matrix tells us that we cannot connect from node1 to node3,
|
||||
and that (therefore) we don't know the state of any outbound
|
||||
connection from node3.
|
||||
(Note this may take some time depending on the `connect_timeout`
|
||||
setting in the registered node `conninfo` strings; default is 1
|
||||
minute which means without modification the above command would
|
||||
take around 2 minutes to run; see comment elsewhere about setting
|
||||
`connect_timeout`)
|
||||
|
||||
In this case, the `cluster diagnose` command is more informative:
|
||||
The matrix tells us that we cannot connect from `node1` to `node3`,
|
||||
and that (therefore) we don't know the state of any outbound
|
||||
connection from node3.
|
||||
|
||||
In this case, the `cluster diagnose` command is more informative:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster diagnose
|
||||
|
||||
@@ -1684,9 +1690,9 @@ which contains connection details for the local database.
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | * | * | *
|
||||
|
||||
What happened is that `cluster diagnose` merged its own `cluster
|
||||
matrix` with the `cluster matrix` output from node2; the latter is
|
||||
able to connect to node3 and therefore determine the state of
|
||||
What happened is that `cluster diagnose` merged its own `cluster
|
||||
matrix` with the `cluster matrix` output from `node2`; the latter is
|
||||
able to connect to `node3` and therefore determine the state of
|
||||
outbound connections from that node.
|
||||
|
||||
|
||||
@@ -1724,7 +1730,7 @@ exit:
|
||||
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
|
||||
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
|
||||
* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only)
|
||||
* ERR_REGISTRATION_SYNC (20) After registering a standby, local node record was no
|
||||
* ERR_REGISTRATION_SYNC (20) After registering a standby, local node record was not
|
||||
syncrhonised (repmgr only, with --wait option)
|
||||
|
||||
Support and Assistance
|
||||
|
||||
91
repmgr.c
91
repmgr.c
@@ -113,8 +113,8 @@ static void get_barman_property(char *dst, char *name, char *local_repmgr_direct
|
||||
|
||||
static char *string_skip_prefix(const char *prefix, char *string);
|
||||
static char *string_remove_trailing_newlines(char *string);
|
||||
static int build_cluster_matrix(t_node_status_matrix *matrix, int *name_length);
|
||||
static int build_cluster_diagnose(t_node_status_cube ***cube, int *name_length);
|
||||
static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length);
|
||||
static int build_cluster_diagnose(t_node_status_cube ***cube_dest, int *name_length);
|
||||
|
||||
static char *make_pg_path(char *file);
|
||||
static char *make_barman_ssh_command(void);
|
||||
@@ -163,7 +163,7 @@ static void parse_pg_basebackup_options(const char *pg_basebackup_options, t_bas
|
||||
static void config_file_list_init(t_configfile_list *list, int max_size);
|
||||
static void config_file_list_add(t_configfile_list *list, const char *file, const char *filename, bool in_data_dir);
|
||||
|
||||
static void matrix_set_node_status(t_node_status_matrix *matrix, int node_id, int connection_node_id, int connection_status);
|
||||
static void matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id, int connection_node_id, int connection_status);
|
||||
static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
|
||||
|
||||
/* Global variables */
|
||||
@@ -1114,19 +1114,19 @@ do_cluster_show(void)
|
||||
|
||||
|
||||
static void
|
||||
matrix_set_node_status(t_node_status_matrix *matrix, int node_id, int connection_node_id, int connection_status)
|
||||
matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id, int connection_node_id, int connection_status)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < matrix->length; i++)
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (matrix->matrix_rec_list[i]->node_id == node_id)
|
||||
if (matrix_rec_list[i]->node_id == node_id)
|
||||
{
|
||||
for (j = 0; j < matrix->length; j++)
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
if (matrix->matrix_rec_list[i]->node_status_list[j]->node_id == connection_node_id)
|
||||
if (matrix_rec_list[i]->node_status_list[j]->node_id == connection_node_id)
|
||||
{
|
||||
matrix->matrix_rec_list[i]->node_status_list[j]->node_status = connection_status;
|
||||
matrix_rec_list[i]->node_status_list[j]->node_status = connection_status;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1136,7 +1136,7 @@ matrix_set_node_status(t_node_status_matrix *matrix, int node_id, int connection
|
||||
}
|
||||
|
||||
static int
|
||||
build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
@@ -1148,6 +1148,8 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
PQExpBufferData command;
|
||||
PQExpBufferData command_output;
|
||||
|
||||
t_node_matrix_rec **matrix_rec_list;
|
||||
|
||||
/* We need to connect to get the list of nodes */
|
||||
log_info(_("connecting to database\n"));
|
||||
|
||||
@@ -1185,7 +1187,7 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
PQfinish(conn);
|
||||
|
||||
/*
|
||||
* Allocate an empty matrix
|
||||
* Allocate an empty matrix record list
|
||||
*
|
||||
* -2 == NULL ?
|
||||
* -1 == Error x
|
||||
@@ -1193,9 +1195,7 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
*/
|
||||
n = PQntuples(res);
|
||||
|
||||
matrix->length = n;
|
||||
|
||||
matrix->matrix_rec_list = (t_node_status_matrix_rec **) pg_malloc0(sizeof(t_node_status_matrix_rec) * n);
|
||||
matrix_rec_list = (t_node_matrix_rec **) pg_malloc0(sizeof(t_node_matrix_rec) * n);
|
||||
|
||||
|
||||
/* Initialise matrix structure for each node */
|
||||
@@ -1203,25 +1203,25 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
{
|
||||
int name_length_cur;
|
||||
|
||||
matrix->matrix_rec_list[i] = (t_node_status_matrix_rec *) pg_malloc0(sizeof(t_node_status_matrix_rec));
|
||||
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||
|
||||
matrix->matrix_rec_list[i]->node_id = atoi(PQgetvalue(res, i, 4));
|
||||
strncpy(matrix->matrix_rec_list[i]->node_name, PQgetvalue(res, i, 2), MAXLEN);
|
||||
matrix_rec_list[i]->node_id = atoi(PQgetvalue(res, i, 4));
|
||||
strncpy(matrix_rec_list[i]->node_name, PQgetvalue(res, i, 2), MAXLEN);
|
||||
|
||||
/*
|
||||
* Find the maximum length of a node name
|
||||
*/
|
||||
name_length_cur = strlen(matrix->matrix_rec_list[i]->node_name);
|
||||
name_length_cur = strlen(matrix_rec_list[i]->node_name);
|
||||
if (name_length_cur > *name_length)
|
||||
*name_length = name_length_cur;
|
||||
|
||||
matrix->matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * n);
|
||||
matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * n);
|
||||
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
matrix->matrix_rec_list[i]->node_status_list[j] = (t_node_status_rec *) pg_malloc0(sizeof(t_node_status_rec));
|
||||
matrix->matrix_rec_list[i]->node_status_list[j]->node_id = atoi(PQgetvalue(res, j, 4));
|
||||
matrix->matrix_rec_list[i]->node_status_list[j]->node_status = -2; /* default unknown */
|
||||
matrix_rec_list[i]->node_status_list[j] = (t_node_status_rec *) pg_malloc0(sizeof(t_node_status_rec));
|
||||
matrix_rec_list[i]->node_status_list[j]->node_id = atoi(PQgetvalue(res, j, 4));
|
||||
matrix_rec_list[i]->node_status_list[j]->node_status = -2; /* default unknown */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1249,7 +1249,8 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
(PQstatus(conn) == CONNECTION_OK) ? 0 : -1;
|
||||
|
||||
|
||||
matrix_set_node_status(matrix,
|
||||
matrix_set_node_status(matrix_rec_list,
|
||||
n,
|
||||
local_node_id,
|
||||
connection_node_id,
|
||||
connection_status);
|
||||
@@ -1277,10 +1278,14 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
|
||||
|
||||
if (strlen(pg_bindir))
|
||||
// XXX escape path!
|
||||
{
|
||||
appendPQExpBuffer(&command,
|
||||
"--pg_bindir=%s ",
|
||||
"--pg_bindir=");
|
||||
appendShellString(&command,
|
||||
pg_bindir);
|
||||
appendPQExpBuffer(&command,
|
||||
" ");
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&command,
|
||||
" cluster show --csv\"");
|
||||
@@ -1308,7 +1313,8 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
exit(ERR_INTERNAL);
|
||||
}
|
||||
|
||||
matrix_set_node_status(matrix,
|
||||
matrix_set_node_status(matrix_rec_list,
|
||||
n,
|
||||
connection_node_id,
|
||||
x,
|
||||
(y == -1) ? -1 : 0 );
|
||||
@@ -1324,6 +1330,8 @@ build_cluster_matrix(t_node_status_matrix *matrix, int *name_length)
|
||||
|
||||
PQclear(res);
|
||||
|
||||
*matrix_rec_dest = matrix_rec_list;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
@@ -1337,19 +1345,18 @@ do_cluster_matrix()
|
||||
const char *node_header = "Name";
|
||||
int name_length = strlen(node_header);
|
||||
|
||||
t_node_status_matrix *matrix = (t_node_status_matrix *) pg_malloc(sizeof(t_node_status_matrix));
|
||||
t_node_matrix_rec **matrix_rec_list;
|
||||
|
||||
n = build_cluster_matrix(matrix, &name_length);
|
||||
n = build_cluster_matrix(&matrix_rec_list, &name_length);
|
||||
|
||||
if (runtime_options.csv_mode)
|
||||
{
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
for (j = 0; j < n; j++)
|
||||
printf("%d,%d,%d\n",
|
||||
matrix->matrix_rec_list[i]->node_id,
|
||||
matrix->matrix_rec_list[i]->node_status_list[j]->node_id,
|
||||
matrix->matrix_rec_list[i]->node_status_list[j]->node_status);
|
||||
matrix_rec_list[i]->node_id,
|
||||
matrix_rec_list[i]->node_status_list[j]->node_id,
|
||||
matrix_rec_list[i]->node_status_list[j]->node_status);
|
||||
|
||||
}
|
||||
else
|
||||
@@ -1358,7 +1365,7 @@ do_cluster_matrix()
|
||||
|
||||
printf("%*s | Id ", name_length, node_header);
|
||||
for (i = 0; i < n; i++)
|
||||
printf("| %2d ", matrix->matrix_rec_list[i]->node_id);
|
||||
printf("| %2d ", matrix_rec_list[i]->node_id);
|
||||
printf("\n");
|
||||
|
||||
for (i = 0; i < name_length; i++)
|
||||
@@ -1371,11 +1378,11 @@ do_cluster_matrix()
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
printf("%*s | %2d ", name_length,
|
||||
matrix->matrix_rec_list[i]->node_name,
|
||||
matrix->matrix_rec_list[i]->node_id);
|
||||
matrix_rec_list[i]->node_name,
|
||||
matrix_rec_list[i]->node_id);
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
switch (matrix->matrix_rec_list[i]->node_status_list[j]->node_status)
|
||||
switch (matrix_rec_list[i]->node_status_list[j]->node_status)
|
||||
{
|
||||
case -2:
|
||||
c = '?';
|
||||
@@ -1490,11 +1497,11 @@ build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length)
|
||||
if (name_length_cur > *name_length)
|
||||
*name_length = name_length_cur;
|
||||
|
||||
cube[h]->matrix_list_rec = (t_node_status_matrix_rec **) pg_malloc(sizeof(t_node_status_matrix_rec) * n);
|
||||
cube[h]->matrix_list_rec = (t_node_matrix_rec **) pg_malloc(sizeof(t_node_matrix_rec) * n);
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
cube[h]->matrix_list_rec[i] = (t_node_status_matrix_rec *) pg_malloc0(sizeof(t_node_status_matrix_rec));
|
||||
cube[h]->matrix_list_rec[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||
cube[h]->matrix_list_rec[i]->node_id = atoi(PQgetvalue(res, i, 4));
|
||||
|
||||
/* we don't need the name here */
|
||||
@@ -1536,10 +1543,14 @@ build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length)
|
||||
remote_node_id);
|
||||
|
||||
if (strlen(pg_bindir))
|
||||
// XXX escape path!
|
||||
{
|
||||
appendPQExpBuffer(&command,
|
||||
"--pg_bindir=%s ",
|
||||
"--pg_bindir=");
|
||||
appendShellString(&command,
|
||||
pg_bindir);
|
||||
appendPQExpBuffer(&command,
|
||||
" ");
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&command,
|
||||
"cluster matrix --csv 2>/dev/null");
|
||||
|
||||
15
repmgr.h
15
repmgr.h
@@ -174,31 +174,20 @@ typedef struct
|
||||
int node_status;
|
||||
} t_node_status_rec;
|
||||
|
||||
// for each node, list of statuses for other nodes
|
||||
// output of "cluster show"
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[MAXLEN];
|
||||
t_node_status_rec **node_status_list;
|
||||
} t_node_status_matrix_rec;
|
||||
} t_node_matrix_rec;
|
||||
|
||||
|
||||
|
||||
// points to the output of "cluster matrix" on each node
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[MAXLEN];
|
||||
t_node_status_matrix_rec **matrix_list_rec;
|
||||
t_node_matrix_rec **matrix_list_rec;
|
||||
} t_node_status_cube;
|
||||
|
||||
|
||||
// not really needed
|
||||
typedef struct
|
||||
{
|
||||
int length;
|
||||
t_node_status_matrix_rec **matrix_rec_list;
|
||||
} t_node_status_matrix;
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user