mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-24 07:36:30 +00:00
Compare commits
30 Commits
v3.1.4
...
REL3_1_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3802b917e0 | ||
|
|
4f7a2a0614 | ||
|
|
06c7fe04b0 | ||
|
|
1fe01e9168 | ||
|
|
ed1136f443 | ||
|
|
a7ed60a533 | ||
|
|
fc5a18410d | ||
|
|
fd52c8ec3c | ||
|
|
47f1c6fa84 | ||
|
|
fba89ef37c | ||
|
|
4cc6cbe32f | ||
|
|
c715077c29 | ||
|
|
c178d8ed27 | ||
|
|
d4d06f43f7 | ||
|
|
0d346a9f54 | ||
|
|
abb16e4366 | ||
|
|
59b1924d5b | ||
|
|
c88ea62643 | ||
|
|
5b91a5e2e5 | ||
|
|
c2a1a35282 | ||
|
|
2b8b74ae75 | ||
|
|
08ef4d4be6 | ||
|
|
1a0049f086 | ||
|
|
af6f0fc2cf | ||
|
|
893d67473d | ||
|
|
a922cd5558 | ||
|
|
7bbc664230 | ||
|
|
a6998fe0f9 | ||
|
|
dadfdcc51f | ||
|
|
b8823d5c1f |
16
HISTORY
16
HISTORY
@@ -1,4 +1,18 @@
|
|||||||
3.1.4 2016-07-
|
3.1.5 2016-08-15
|
||||||
|
repmgrd: in a failover situation, prevent endless looping when
|
||||||
|
attempting to establish the status of a node with
|
||||||
|
`failover=manual` (Ian)
|
||||||
|
repmgrd: improve handling of failover events on standbys with
|
||||||
|
`failover=manual`, and create a new event notification
|
||||||
|
for this, `standby_disconnect_manual` (Ian)
|
||||||
|
repmgr: add further event notifications (Gianni)
|
||||||
|
repmgr: when executing `standby switchover`, don't collect remote
|
||||||
|
command output unless required (Gianni, Ian)
|
||||||
|
repmgrd: improve standby monitoring query (Ian, based on suggestion
|
||||||
|
from Álvaro)
|
||||||
|
repmgr: various command line handling improvements (Ian)
|
||||||
|
|
||||||
|
3.1.4 2016-07-12
|
||||||
repmgr: new configuration option for setting "restore_command"
|
repmgr: new configuration option for setting "restore_command"
|
||||||
in the recovery.conf file generated by repmgr (Martín)
|
in the recovery.conf file generated by repmgr (Martín)
|
||||||
repmgr: add --csv option to "repmgr cluster show" (Gianni)
|
repmgr: add --csv option to "repmgr cluster show" (Gianni)
|
||||||
|
|||||||
96
README.md
96
README.md
@@ -155,9 +155,15 @@ system.
|
|||||||
|
|
||||||
- RedHat/CentOS: RPM packages for `repmgr` are available via Yum through
|
- RedHat/CentOS: RPM packages for `repmgr` are available via Yum through
|
||||||
the PostgreSQL Global Development Group RPM repository ( http://yum.postgresql.org/ ).
|
the PostgreSQL Global Development Group RPM repository ( http://yum.postgresql.org/ ).
|
||||||
You need to follow the instructions for your distribution (RedHat, CentOS,
|
Follow the instructions for your distribution (RedHat, CentOS,
|
||||||
Fedora, etc.) and architecture as detailed at yum.postgresql.org.
|
Fedora, etc.) and architecture as detailed at yum.postgresql.org.
|
||||||
|
|
||||||
|
2ndQuadrant also provides its own RPM packages which are made available
|
||||||
|
at the same time as each `repmgr` release, as it can take some days for
|
||||||
|
them to become available via the main PGDG repository. See here for details:
|
||||||
|
|
||||||
|
http://repmgr.org/yum-repository.html
|
||||||
|
|
||||||
- Debian/Ubuntu: the most recent `repmgr` packages are available from the
|
- Debian/Ubuntu: the most recent `repmgr` packages are available from the
|
||||||
PostgreSQL Community APT repository ( http://apt.postgresql.org/ ).
|
PostgreSQL Community APT repository ( http://apt.postgresql.org/ ).
|
||||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||||
@@ -411,14 +417,32 @@ Clone the standby with:
|
|||||||
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
||||||
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
||||||
|
|
||||||
This will clone the PostgreSQL data directory files from the master at repmgr_node1
|
This will clone the PostgreSQL data directory files from the master at `repmgr_node1`
|
||||||
using PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
using PostgreSQL's `pg_basebackup` utility. A `recovery.conf` file containing the
|
||||||
correct parameters to start streaming from this master server will be created
|
correct parameters to start streaming from this master server will be created
|
||||||
automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf`
|
automatically, and unless otherwise specified, the `postgresql.conf` and `pg_hba.conf`
|
||||||
files will be copied from the master.
|
files will be copied from the master.
|
||||||
|
|
||||||
Make any adjustments to the PostgreSQL configuration files now, then start the
|
Be aware that when initially cloning a standby, you will need to ensure
|
||||||
standby server.
|
that all required WAL files remain available while the cloning is taking
|
||||||
|
place. To ensure this happens when using the default `pg_basebackup` method,
|
||||||
|
`repmgr` will set `pg_basebackup`'s `--xlog-method` parameter to `stream`,
|
||||||
|
which will ensure all WAL files generated during the cloning process are
|
||||||
|
streamed in parallel with the main backup. Note that this requires two
|
||||||
|
replication connections to be available.
|
||||||
|
|
||||||
|
To override this behaviour, in `repmgr.conf` set `pg_basebackup`'s
|
||||||
|
`--xlog-method` parameter to `fetch`:
|
||||||
|
|
||||||
|
pg_basebackup_options='--xlog-method=fetch'
|
||||||
|
|
||||||
|
and ensure that `wal_keep_segments` is set to an appropriately high value.
|
||||||
|
See the `pg_basebackup` documentation for details:
|
||||||
|
|
||||||
|
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||||
|
|
||||||
|
Make any adjustments to the standby's PostgreSQL configuration files now,
|
||||||
|
then start the server.
|
||||||
|
|
||||||
* * *
|
* * *
|
||||||
|
|
||||||
@@ -622,6 +646,10 @@ To enable `repmgr` to use replication slots, set the boolean parameter
|
|||||||
Note that `repmgr` will fail with an error if this option is specified when
|
Note that `repmgr` will fail with an error if this option is specified when
|
||||||
working with PostgreSQL 9.3.
|
working with PostgreSQL 9.3.
|
||||||
|
|
||||||
|
Replication slots must be enabled in `postgresql.conf` by setting the parameter
|
||||||
|
`max_replication_slots` to at least the number of expected standbys (changes
|
||||||
|
to this parameter require a server restart).
|
||||||
|
|
||||||
When cloning a standby, `repmgr` will automatically generate an appropriate
|
When cloning a standby, `repmgr` will automatically generate an appropriate
|
||||||
slot name, which is stored in the `repl_nodes` table, and create the slot
|
slot name, which is stored in the `repl_nodes` table, and create the slot
|
||||||
on the master:
|
on the master:
|
||||||
@@ -644,18 +672,6 @@ Note that a slot name will be created by default for the master but not
|
|||||||
actually used unless the master is converted to a standby using e.g.
|
actually used unless the master is converted to a standby using e.g.
|
||||||
`repmgr standby switchover`.
|
`repmgr standby switchover`.
|
||||||
|
|
||||||
Be aware that when initially cloning a standby, you will need to ensure
|
|
||||||
that all required WAL files remain available while the cloning is taking
|
|
||||||
place. If using the default `pg_basebackup` method, we recommend setting
|
|
||||||
`pg_basebackup`'s `--xlog-method` parameter to `stream` like this:
|
|
||||||
|
|
||||||
pg_basebackup_options='--xlog-method=stream'
|
|
||||||
|
|
||||||
See the `pg_basebackup` documentation for details:
|
|
||||||
https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
|
||||||
|
|
||||||
Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
|
|
||||||
value.
|
|
||||||
|
|
||||||
Further information on replication slots in the PostgreSQL documentation:
|
Further information on replication slots in the PostgreSQL documentation:
|
||||||
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||||
@@ -953,7 +969,7 @@ actions happening, but we strongly recommend executing `repmgr` directly.
|
|||||||
|
|
||||||
`repmgrd` can be started simply with e.g.:
|
`repmgrd` can be started simply with e.g.:
|
||||||
|
|
||||||
repmgrd -f /etc/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
repmgrd -f /etc/repmgr.conf --verbose >> $HOME/repmgr/repmgr.log 2>&1
|
||||||
|
|
||||||
For permanent operation, we recommend using the options `-d/--daemonize` to
|
For permanent operation, we recommend using the options `-d/--daemonize` to
|
||||||
detach the `repmgrd` process, and `-p/--pid-file` to write the process PID
|
detach the `repmgrd` process, and `-p/--pid-file` to write the process PID
|
||||||
@@ -975,7 +991,7 @@ table looks like this:
|
|||||||
|
|
||||||
|
|
||||||
Start `repmgrd` on each standby and verify that it's running by examining
|
Start `repmgrd` on each standby and verify that it's running by examining
|
||||||
the log output, which at default log level will look like this:
|
the log output, which at log level INFO will look like this:
|
||||||
|
|
||||||
[2016-01-05 13:15:40] [INFO] checking cluster configuration with schema 'repmgr_test'
|
[2016-01-05 13:15:40] [INFO] checking cluster configuration with schema 'repmgr_test'
|
||||||
[2016-01-05 13:15:40] [INFO] checking node 2 in cluster 'test'
|
[2016-01-05 13:15:40] [INFO] checking node 2 in cluster 'test'
|
||||||
@@ -1075,7 +1091,7 @@ the length of time it takes to determine that the connection is not possible.
|
|||||||
In particular explicitly setting a parameter for `connect_timeout` should
|
In particular explicitly setting a parameter for `connect_timeout` should
|
||||||
be considered; the effective minimum value of `2` (seconds) will ensure
|
be considered; the effective minimum value of `2` (seconds) will ensure
|
||||||
that a connection failure at network level is reported as soon as possible,
|
that a connection failure at network level is reported as soon as possible,
|
||||||
otherwise dependeing on the system settings (e.g. `tcp_syn_retries` in Linux)
|
otherwise depending on the system settings (e.g. `tcp_syn_retries` in Linux)
|
||||||
a delay of a minute or more is possible.
|
a delay of a minute or more is possible.
|
||||||
|
|
||||||
For further details on `conninfo` network connection parameters, see:
|
For further details on `conninfo` network connection parameters, see:
|
||||||
@@ -1116,9 +1132,16 @@ table , it's advisable to regularly purge historical data with
|
|||||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||||
many day's worth of data should be retained.
|
many day's worth of data should be retained.
|
||||||
|
|
||||||
|
It's possible to use `repmgrd` to provide monitoring only for some or all
|
||||||
|
nodes by setting `failover = manual` in the node's `repmgr.conf`. In the
|
||||||
|
event of the node's upstream failing, no failover action will be taken
|
||||||
|
and the node will require manual intervention to be reattached to replication.
|
||||||
|
If this occurs, event notification `standby_disconnect_manual` will be
|
||||||
|
created.
|
||||||
|
|
||||||
Note that when a standby node is not streaming directly from its upstream
|
Note that when a standby node is not streaming directly from its upstream
|
||||||
node, i.e. recovering WAL from an archive, `apply_lag` will always
|
node, e.g. recovering WAL from an archive, `apply_lag` will always appear as
|
||||||
appear as `0 bytes`.
|
`0 bytes`.
|
||||||
|
|
||||||
|
|
||||||
Using a witness server with repmgrd
|
Using a witness server with repmgrd
|
||||||
@@ -1215,6 +1238,7 @@ The following event types are available:
|
|||||||
* `standby_promote`
|
* `standby_promote`
|
||||||
* `standby_follow`
|
* `standby_follow`
|
||||||
* `standby_switchover`
|
* `standby_switchover`
|
||||||
|
* `standby_disconnect_manual`
|
||||||
* `witness_create`
|
* `witness_create`
|
||||||
* `witness_create`
|
* `witness_create`
|
||||||
* `repmgrd_start`
|
* `repmgrd_start`
|
||||||
@@ -1376,17 +1400,32 @@ which contains connection details for the local database.
|
|||||||
when analyzing connectivity from a particular node.
|
when analyzing connectivity from a particular node.
|
||||||
|
|
||||||
This command requires a valid `repmgr.conf` file to be provided; no
|
This command requires a valid `repmgr.conf` file to be provided; no
|
||||||
additional arguments are required.
|
additional arguments are needed.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show
|
||||||
|
|
||||||
Role | Name | Upstream | Connection String
|
Role | Name | Upstream | Connection String
|
||||||
----------+-------|----------|--------------------------------------------
|
----------+-------|----------|----------------------------------------
|
||||||
* master | node1 | | host=repmgr_node1 dbname=repmgr user=repmgr
|
* master | node1 | | host=db_node1 dbname=repmgr user=repmgr
|
||||||
standby | node2 | node1 | host=repmgr_node1 dbname=repmgr user=repmgr
|
standby | node2 | node1 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
standby | node3 | node2 | host=repmgr_node1 dbname=repmgr user=repmgr
|
standby | node3 | node2 | host=db_node3 dbname=repmgr user=repmgr
|
||||||
|
|
||||||
|
To show database connection errors when polling nodes, run the command in
|
||||||
|
`--verbose` mode.
|
||||||
|
|
||||||
|
The `cluster show` command now accepts the optional parameter `--csv`, which
|
||||||
|
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||||
|
parsing by scripts:
|
||||||
|
|
||||||
|
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||||
|
1,-1
|
||||||
|
2,0
|
||||||
|
3,1
|
||||||
|
|
||||||
|
The first column is the node's ID, and the second column represents the
|
||||||
|
node's status (0 = master, 1 = standby, -1 = failed).
|
||||||
|
|
||||||
* `cluster cleanup`
|
* `cluster cleanup`
|
||||||
|
|
||||||
@@ -1466,5 +1505,6 @@ Thanks from the repmgr core team.
|
|||||||
Further reading
|
Further reading
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
* http://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
||||||
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||||
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
||||||
|
|||||||
9
config.c
9
config.c
@@ -219,6 +219,9 @@ parse_config(t_configuration_options *options)
|
|||||||
memset(options->node_name, 0, sizeof(options->node_name));
|
memset(options->node_name, 0, sizeof(options->node_name));
|
||||||
memset(options->promote_command, 0, sizeof(options->promote_command));
|
memset(options->promote_command, 0, sizeof(options->promote_command));
|
||||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||||
|
memset(options->stop_command, 0, sizeof(options->stop_command));
|
||||||
|
memset(options->start_command, 0, sizeof(options->start_command));
|
||||||
|
memset(options->restart_command, 0, sizeof(options->restart_command));
|
||||||
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
||||||
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
||||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||||
@@ -341,6 +344,12 @@ parse_config(t_configuration_options *options)
|
|||||||
strncpy(options->promote_command, value, MAXLEN);
|
strncpy(options->promote_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "follow_command") == 0)
|
else if (strcmp(name, "follow_command") == 0)
|
||||||
strncpy(options->follow_command, value, MAXLEN);
|
strncpy(options->follow_command, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "stop_command") == 0)
|
||||||
|
strncpy(options->stop_command, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "start_command") == 0)
|
||||||
|
strncpy(options->start_command, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "restart_command") == 0)
|
||||||
|
strncpy(options->restart_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "master_response_timeout") == 0)
|
else if (strcmp(name, "master_response_timeout") == 0)
|
||||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||||
/*
|
/*
|
||||||
|
|||||||
5
config.h
5
config.h
@@ -62,6 +62,9 @@ typedef struct
|
|||||||
char node_name[MAXLEN];
|
char node_name[MAXLEN];
|
||||||
char promote_command[MAXLEN];
|
char promote_command[MAXLEN];
|
||||||
char follow_command[MAXLEN];
|
char follow_command[MAXLEN];
|
||||||
|
char stop_command[MAXLEN];
|
||||||
|
char start_command[MAXLEN];
|
||||||
|
char restart_command[MAXLEN];
|
||||||
char loglevel[MAXLEN];
|
char loglevel[MAXLEN];
|
||||||
char logfacility[MAXLEN];
|
char logfacility[MAXLEN];
|
||||||
char rsync_options[QUERY_STR_LEN];
|
char rsync_options[QUERY_STR_LEN];
|
||||||
@@ -87,7 +90,7 @@ typedef struct
|
|||||||
* The following will initialize the structure with a minimal set of options;
|
* The following will initialize the structure with a minimal set of options;
|
||||||
* actual defaults are set in parse_config() before parsing the configuration file
|
* actual defaults are set in parse_config() before parsing the configuration file
|
||||||
*/
|
*/
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
typedef struct ErrorListCell
|
||||||
{
|
{
|
||||||
|
|||||||
51
dbutils.c
51
dbutils.c
@@ -34,7 +34,7 @@ char repmgr_schema_quoted[MAXLEN] = "";
|
|||||||
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
|
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
|
||||||
|
|
||||||
PGconn *
|
PGconn *
|
||||||
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
|
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice, const bool verbose_only)
|
||||||
{
|
{
|
||||||
/* Make a connection to the database */
|
/* Make a connection to the database */
|
||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
@@ -50,15 +50,23 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
|||||||
/* Check to see that the backend connection was successfully made */
|
/* Check to see that the backend connection was successfully made */
|
||||||
if ((PQstatus(conn) != CONNECTION_OK))
|
if ((PQstatus(conn) != CONNECTION_OK))
|
||||||
{
|
{
|
||||||
if (log_notice)
|
bool emit_log = true;
|
||||||
|
|
||||||
|
if (verbose_only == true && verbose_logging == false)
|
||||||
|
emit_log = false;
|
||||||
|
|
||||||
|
if (emit_log)
|
||||||
{
|
{
|
||||||
log_notice(_("connection to database failed: %s\n"),
|
if (log_notice)
|
||||||
PQerrorMessage(conn));
|
{
|
||||||
}
|
log_notice(_("connection to database failed: %s\n"),
|
||||||
else
|
PQerrorMessage(conn));
|
||||||
{
|
}
|
||||||
log_err(_("connection to database failed: %s\n"),
|
else
|
||||||
PQerrorMessage(conn));
|
{
|
||||||
|
log_err(_("connection to database failed: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (exit_on_error)
|
if (exit_on_error)
|
||||||
@@ -71,16 +79,35 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
|||||||
return conn;
|
return conn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Establish a database connection, optionally exit on error
|
||||||
|
*/
|
||||||
PGconn *
|
PGconn *
|
||||||
establish_db_connection(const char *conninfo, const bool exit_on_error)
|
establish_db_connection(const char *conninfo, const bool exit_on_error)
|
||||||
{
|
{
|
||||||
return _establish_db_connection(conninfo, exit_on_error, false);
|
return _establish_db_connection(conninfo, exit_on_error, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to establish a database connection, never exit on error, only
|
||||||
|
* output error messages if --verbose option used
|
||||||
|
*/
|
||||||
PGconn *
|
PGconn *
|
||||||
test_db_connection(const char *conninfo, const bool exit_on_error)
|
establish_db_connection_quiet(const char *conninfo)
|
||||||
{
|
{
|
||||||
return _establish_db_connection(conninfo, exit_on_error, true);
|
return _establish_db_connection(conninfo, false, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to establish a database connection, never exit on error,
|
||||||
|
* output connection error messages as NOTICE (useful when connection
|
||||||
|
* failure is expected)
|
||||||
|
*/
|
||||||
|
PGconn *
|
||||||
|
test_db_connection(const char *conninfo)
|
||||||
|
{
|
||||||
|
return _establish_db_connection(conninfo, false, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -81,11 +81,12 @@ typedef struct s_replication_slot
|
|||||||
|
|
||||||
PGconn *_establish_db_connection(const char *conninfo,
|
PGconn *_establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error,
|
const bool exit_on_error,
|
||||||
const bool log_notice);
|
const bool log_notice,
|
||||||
|
const bool verbose_only);
|
||||||
PGconn *establish_db_connection(const char *conninfo,
|
PGconn *establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *test_db_connection(const char *conninfo,
|
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||||
const bool exit_on_error);
|
PGconn *test_db_connection(const char *conninfo);
|
||||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||||
const char *values[],
|
const char *values[],
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
|
|||||||
2
log.c
2
log.c
@@ -142,7 +142,7 @@ log_verbose(int level, const char *fmt, ...)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
logger_init(t_configuration_options * opts, const char *ident)
|
logger_init(t_configuration_options *opts, const char *ident)
|
||||||
{
|
{
|
||||||
char *level = opts->loglevel;
|
char *level = opts->loglevel;
|
||||||
char *facility = opts->logfacility;
|
char *facility = opts->logfacility;
|
||||||
|
|||||||
4
log.h
4
log.h
@@ -130,5 +130,7 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
|||||||
|
|
||||||
extern int log_type;
|
extern int log_type;
|
||||||
extern int log_level;
|
extern int log_level;
|
||||||
|
extern int verbose_logging;
|
||||||
|
extern int terse_logging;
|
||||||
|
|
||||||
#endif
|
#endif /* _REPMGR_LOG_H_ */
|
||||||
|
|||||||
264
repmgr.c
264
repmgr.c
@@ -59,7 +59,9 @@
|
|||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
#include "version.h"
|
#include "version.h"
|
||||||
|
|
||||||
#define RECOVERY_FILE "recovery.conf"
|
#ifndef RECOVERY_COMMAND_FILE
|
||||||
|
#define RECOVERY_COMMAND_FILE "recovery.conf"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef TABLESPACE_MAP
|
#ifndef TABLESPACE_MAP
|
||||||
#define TABLESPACE_MAP "tablespace_map"
|
#define TABLESPACE_MAP "tablespace_map"
|
||||||
@@ -139,6 +141,7 @@ t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
|
|||||||
t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
|
|
||||||
bool wal_keep_segments_used = false;
|
bool wal_keep_segments_used = false;
|
||||||
|
bool conninfo_provided = false;
|
||||||
bool connection_param_provided = false;
|
bool connection_param_provided = false;
|
||||||
bool host_param_provided = false;
|
bool host_param_provided = false;
|
||||||
bool pg_rewind_supplied = false;
|
bool pg_rewind_supplied = false;
|
||||||
@@ -170,8 +173,6 @@ main(int argc, char **argv)
|
|||||||
{"data-dir", required_argument, NULL, 'D'},
|
{"data-dir", required_argument, NULL, 'D'},
|
||||||
/* alias for -D/--data-dir, following pg_ctl usage */
|
/* alias for -D/--data-dir, following pg_ctl usage */
|
||||||
{"pgdata", required_argument, NULL, 'D'},
|
{"pgdata", required_argument, NULL, 'D'},
|
||||||
/* -l/--local-port is deprecated */
|
|
||||||
{"local-port", required_argument, NULL, 'l'},
|
|
||||||
{"config-file", required_argument, NULL, 'f'},
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
{"remote-user", required_argument, NULL, 'R'},
|
{"remote-user", required_argument, NULL, 'R'},
|
||||||
{"wal-keep-segments", required_argument, NULL, 'w'},
|
{"wal-keep-segments", required_argument, NULL, 'w'},
|
||||||
@@ -186,17 +187,18 @@ main(int argc, char **argv)
|
|||||||
{"terse", required_argument, NULL, 't'},
|
{"terse", required_argument, NULL, 't'},
|
||||||
{"mode", required_argument, NULL, 'm'},
|
{"mode", required_argument, NULL, 'm'},
|
||||||
{"remote-config-file", required_argument, NULL, 'C'},
|
{"remote-config-file", required_argument, NULL, 'C'},
|
||||||
/* deprecated from 3.2; replaced with -P/--pwprompt */
|
{"help", no_argument, NULL, OPT_HELP},
|
||||||
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
||||||
{"check-upstream-config", no_argument, NULL, 2},
|
{"recovery-min-apply-delay", required_argument, NULL, OPT_RECOVERY_MIN_APPLY_DELAY},
|
||||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
{"ignore-external-config-files", no_argument, NULL, OPT_IGNORE_EXTERNAL_CONFIG_FILES},
|
||||||
{"ignore-external-config-files", no_argument, NULL, 4},
|
{"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR},
|
||||||
{"config-archive-dir", required_argument, NULL, 5},
|
{"pg_rewind", optional_argument, NULL, OPT_PG_REWIND},
|
||||||
{"pg_rewind", optional_argument, NULL, 6},
|
{"pwprompt", optional_argument, NULL, OPT_PWPROMPT},
|
||||||
{"pwprompt", optional_argument, NULL, 7},
|
{"csv", no_argument, NULL, OPT_CSV},
|
||||||
{"csv", no_argument, NULL, 8},
|
|
||||||
{"help", no_argument, NULL, '?'},
|
|
||||||
{"version", no_argument, NULL, 'V'},
|
{"version", no_argument, NULL, 'V'},
|
||||||
|
/* Following options deprecated */
|
||||||
|
{"local-port", required_argument, NULL, 'l'},
|
||||||
|
{"initdb-no-pwprompt", no_argument, NULL, OPT_INITDB_NO_PWPROMPT},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -284,6 +286,20 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
PQconninfoFree(defs);
|
PQconninfoFree(defs);
|
||||||
|
|
||||||
|
/* set default user for -R/--remote-user */
|
||||||
|
|
||||||
|
{
|
||||||
|
struct passwd *pw = NULL;
|
||||||
|
|
||||||
|
pw = getpwuid(geteuid());
|
||||||
|
if (pw == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
strncpy(runtime_options.username, pw->pw_name, MAXLEN);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Though libpq will default to the username as dbname, PQconndefaults()
|
* Though libpq will default to the username as dbname, PQconndefaults()
|
||||||
@@ -297,7 +313,7 @@ main(int argc, char **argv)
|
|||||||
/* Prevent getopt_long() from printing an error message */
|
/* Prevent getopt_long() from printing an error message */
|
||||||
opterr = 0;
|
opterr = 0;
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "?Vd:h:p:U:S:D:l:f:R:w:k:FWIvb:rcL:tm:C:", long_options,
|
while ((c = getopt_long(argc, argv, "?Vd:h:p:U:S:D:f:R:w:k:FWIvb:rcL:tm:C:l:", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -309,6 +325,17 @@ main(int argc, char **argv)
|
|||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
case '?':
|
case '?':
|
||||||
|
/* Actual help option given */
|
||||||
|
if (strcmp(argv[optind - 1], "-?") == 0)
|
||||||
|
{
|
||||||
|
do_help();
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
/* unknown option reported by getopt */
|
||||||
|
else
|
||||||
|
goto unknown_option;
|
||||||
|
break;
|
||||||
|
case OPT_HELP:
|
||||||
do_help();
|
do_help();
|
||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
case 'V':
|
case 'V':
|
||||||
@@ -344,13 +371,6 @@ main(int argc, char **argv)
|
|||||||
case 'D':
|
case 'D':
|
||||||
strncpy(runtime_options.dest_dir, optarg, MAXPGPATH);
|
strncpy(runtime_options.dest_dir, optarg, MAXPGPATH);
|
||||||
break;
|
break;
|
||||||
case 'l':
|
|
||||||
/* -l/--local-port is deprecated */
|
|
||||||
repmgr_atoi(optarg, "-l/--local-port", &cli_errors, false);
|
|
||||||
strncpy(runtime_options.localport,
|
|
||||||
optarg,
|
|
||||||
MAXLEN);
|
|
||||||
break;
|
|
||||||
case 'f':
|
case 'f':
|
||||||
strncpy(runtime_options.config_file, optarg, MAXLEN);
|
strncpy(runtime_options.config_file, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
@@ -430,18 +450,15 @@ main(int argc, char **argv)
|
|||||||
case 'C':
|
case 'C':
|
||||||
strncpy(runtime_options.remote_config_file, optarg, MAXLEN);
|
strncpy(runtime_options.remote_config_file, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case OPT_CHECK_UPSTREAM_CONFIG:
|
||||||
runtime_options.initdb_no_pwprompt = true;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
check_upstream_config = true;
|
check_upstream_config = true;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case OPT_RECOVERY_MIN_APPLY_DELAY:
|
||||||
targ = strtol(optarg, &ptr, 10);
|
targ = strtol(optarg, &ptr, 10);
|
||||||
|
|
||||||
if (targ < 1)
|
if (targ < 1)
|
||||||
{
|
{
|
||||||
error_list_append(&cli_errors, _("Invalid value provided for '-r/--recovery-min-apply-delay'"));
|
error_list_append(&cli_errors, _("Invalid value provided for '--recovery-min-apply-delay'"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ptr && *ptr)
|
if (ptr && *ptr)
|
||||||
@@ -450,34 +467,49 @@ main(int argc, char **argv)
|
|||||||
strcmp(ptr, "min") != 0 && strcmp(ptr, "h") != 0 &&
|
strcmp(ptr, "min") != 0 && strcmp(ptr, "h") != 0 &&
|
||||||
strcmp(ptr, "d") != 0)
|
strcmp(ptr, "d") != 0)
|
||||||
{
|
{
|
||||||
error_list_append(&cli_errors, _("Value provided for '-r/--recovery-min-apply-delay' must be one of ms/s/min/h/d"));
|
error_list_append(&cli_errors, _("Value provided for '--recovery-min-apply-delay' must be one of ms/s/min/h/d"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
strncpy(runtime_options.recovery_min_apply_delay, optarg, MAXLEN);
|
strncpy(runtime_options.recovery_min_apply_delay, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case OPT_IGNORE_EXTERNAL_CONFIG_FILES:
|
||||||
runtime_options.ignore_external_config_files = true;
|
runtime_options.ignore_external_config_files = true;
|
||||||
break;
|
break;
|
||||||
case 5:
|
case OPT_CONFIG_ARCHIVE_DIR:
|
||||||
strncpy(runtime_options.config_archive_dir, optarg, MAXLEN);
|
strncpy(runtime_options.config_archive_dir, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
case 6:
|
case OPT_PG_REWIND:
|
||||||
if (optarg != NULL)
|
if (optarg != NULL)
|
||||||
{
|
{
|
||||||
strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH);
|
strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH);
|
||||||
}
|
}
|
||||||
pg_rewind_supplied = true;
|
pg_rewind_supplied = true;
|
||||||
break;
|
break;
|
||||||
case 7:
|
case OPT_PWPROMPT:
|
||||||
runtime_options.witness_pwprompt = true;
|
runtime_options.witness_pwprompt = true;
|
||||||
break;
|
break;
|
||||||
case 8:
|
case OPT_CSV:
|
||||||
runtime_options.csv_mode = true;
|
runtime_options.csv_mode = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* deprecated options - output a warning */
|
||||||
|
case 'l':
|
||||||
|
/* -l/--local-port is deprecated */
|
||||||
|
repmgr_atoi(optarg, "-l/--local-port", &cli_errors, false);
|
||||||
|
strncpy(runtime_options.localport,
|
||||||
|
optarg,
|
||||||
|
MAXLEN);
|
||||||
|
error_list_append(&cli_warnings, _("-l/--local-port is deprecated; repmgr will extract the witness port from the conninfo string in repmgr.conf"));
|
||||||
|
break;
|
||||||
|
case OPT_INITDB_NO_PWPROMPT:
|
||||||
|
/* --initdb-no-pwprompt is deprecated */
|
||||||
|
error_list_append(&cli_warnings, _("--initdb-no-pwprompt is deprecated and has no effect; use -P/--pwprompt instead"));
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
unknown_option:
|
||||||
{
|
{
|
||||||
PQExpBufferData unknown_option;
|
PQExpBufferData unknown_option;
|
||||||
initPQExpBuffer(&unknown_option);
|
initPQExpBuffer(&unknown_option);
|
||||||
@@ -500,6 +532,8 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
char *errmsg = NULL;
|
char *errmsg = NULL;
|
||||||
|
|
||||||
|
conninfo_provided = true;
|
||||||
|
|
||||||
opts = PQconninfoParse(runtime_options.dbname, &errmsg);
|
opts = PQconninfoParse(runtime_options.dbname, &errmsg);
|
||||||
|
|
||||||
if (opts == NULL)
|
if (opts == NULL)
|
||||||
@@ -655,18 +689,24 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For some actions we still can receive a last argument */
|
/* STANDBY CLONE historically accepts the upstream hostname as an additional argument */
|
||||||
if (action == STANDBY_CLONE)
|
if (action == STANDBY_CLONE)
|
||||||
{
|
{
|
||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
if (runtime_options.host[0])
|
if (runtime_options.host[0])
|
||||||
{
|
{
|
||||||
error_list_append(&cli_errors, _("Conflicting parameters: you can't use -h while providing a node separately."));
|
PQExpBufferData additional_host_arg;
|
||||||
|
initPQExpBuffer(&additional_host_arg);
|
||||||
|
appendPQExpBuffer(&additional_host_arg,
|
||||||
|
_("Conflicting parameters: you can't use %s while providing a node separately."),
|
||||||
|
conninfo_provided == true ? "host=" : "-h/--host");
|
||||||
|
error_list_append(&cli_errors, additional_host_arg.data);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(runtime_options.host, argv[optind++], MAXLEN);
|
strncpy(runtime_options.host, argv[optind++], MAXLEN);
|
||||||
|
param_set("host", runtime_options.host);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -735,15 +775,6 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* keywords[2] = "user";
|
|
||||||
values[2] = (runtime_options.username[0]) ? runtime_options.username : NULL;
|
|
||||||
keywords[3] = "dbname";
|
|
||||||
values[3] = runtime_options.dbname;
|
|
||||||
keywords[4] = "application_name";
|
|
||||||
values[4] = (char *) progname();
|
|
||||||
keywords[5] = NULL;
|
|
||||||
values[5] = NULL;*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the logger. If verbose command line parameter was input,
|
* Initialize the logger. If verbose command line parameter was input,
|
||||||
* make sure that the log level is at least INFO. This is mainly useful
|
* make sure that the log level is at least INFO. This is mainly useful
|
||||||
@@ -878,7 +909,7 @@ do_cluster_show(void)
|
|||||||
upstream_length,
|
upstream_length,
|
||||||
conninfo_length = 0;
|
conninfo_length = 0;
|
||||||
|
|
||||||
/* We need to connect to check configuration */
|
/* Connect to local database to obtain cluster connection data */
|
||||||
log_info(_("connecting to database\n"));
|
log_info(_("connecting to database\n"));
|
||||||
conn = establish_db_connection(options.conninfo, true);
|
conn = establish_db_connection(options.conninfo, true);
|
||||||
|
|
||||||
@@ -953,7 +984,8 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
conn = establish_db_connection(PQgetvalue(res, i, 0), false);
|
conn = establish_db_connection_quiet(PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
if (PQstatus(conn) != CONNECTION_OK)
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
strcpy(node_role, " FAILED");
|
strcpy(node_role, " FAILED");
|
||||||
else if (strcmp(PQgetvalue(res, i, 1), "witness") == 0)
|
else if (strcmp(PQgetvalue(res, i, 1), "witness") == 0)
|
||||||
@@ -2767,9 +2799,15 @@ do_standby_follow(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
/* Finally, restart the service */
|
/* Finally, restart the service */
|
||||||
maxlen_snprintf(script, "%s %s -w -D %s -m fast restart",
|
if (*options.restart_command)
|
||||||
make_pg_path("pg_ctl"), options.pg_ctl_options, data_dir);
|
{
|
||||||
|
maxlen_snprintf(script, "%s", options.restart_command);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(script, "%s %s -w -D %s -m fast restart",
|
||||||
|
make_pg_path("pg_ctl"), options.pg_ctl_options, data_dir);
|
||||||
|
}
|
||||||
log_notice(_("restarting server using '%s'\n"),
|
log_notice(_("restarting server using '%s'\n"),
|
||||||
script);
|
script);
|
||||||
|
|
||||||
@@ -2797,7 +2835,13 @@ do_standby_follow(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX add event record - possible move from repmgrd? */
|
create_event_record(master_conn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"standby_follow",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@@ -3241,12 +3285,18 @@ do_standby_switchover(void)
|
|||||||
* -> use -F/--force?
|
* -> use -F/--force?
|
||||||
*/
|
*/
|
||||||
|
|
||||||
maxlen_snprintf(command,
|
if (*options.stop_command)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(command, "%s", options.stop_command);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(command,
|
||||||
"%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
"%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
||||||
make_pg_path("pg_ctl"),
|
make_pg_path("pg_ctl"),
|
||||||
remote_data_directory,
|
remote_data_directory,
|
||||||
runtime_options.pg_ctl_mode);
|
runtime_options.pg_ctl_mode);
|
||||||
|
}
|
||||||
initPQExpBuffer(&command_output);
|
initPQExpBuffer(&command_output);
|
||||||
|
|
||||||
// XXX handle failure
|
// XXX handle failure
|
||||||
@@ -3453,15 +3503,11 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
log_debug("Executing:\n%s\n", command);
|
log_debug("Executing:\n%s\n", command);
|
||||||
|
|
||||||
initPQExpBuffer(&command_output);
|
|
||||||
|
|
||||||
(void)remote_command(
|
(void)remote_command(
|
||||||
remote_host,
|
remote_host,
|
||||||
runtime_options.remote_user,
|
runtime_options.remote_user,
|
||||||
command,
|
command,
|
||||||
&command_output);
|
NULL);
|
||||||
|
|
||||||
termPQExpBuffer(&command_output);
|
|
||||||
|
|
||||||
/* verify that new standby is connected and replicating */
|
/* verify that new standby is connected and replicating */
|
||||||
|
|
||||||
@@ -3470,8 +3516,7 @@ do_standby_switchover(void)
|
|||||||
for(i = 0; i < options.reconnect_attempts; i++)
|
for(i = 0; i < options.reconnect_attempts; i++)
|
||||||
{
|
{
|
||||||
/* Check whether primary is available */
|
/* Check whether primary is available */
|
||||||
|
remote_conn = test_db_connection(remote_conninfo);
|
||||||
remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */
|
|
||||||
|
|
||||||
if (PQstatus(remote_conn) == CONNECTION_OK)
|
if (PQstatus(remote_conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -3506,9 +3551,20 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
if (query_result == -1)
|
if (query_result == -1)
|
||||||
{
|
{
|
||||||
log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id);
|
PQExpBufferData event_details;
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("unable to retrieve replication status for node %i"),
|
||||||
|
remote_node_id);
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
create_event_record(local_conn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"standby_switchover",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
exit(ERR_SWITCHOVER_FAIL);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3532,8 +3588,19 @@ do_standby_switchover(void)
|
|||||||
* - backup
|
* - backup
|
||||||
* - UNKNOWN
|
* - UNKNOWN
|
||||||
*/
|
*/
|
||||||
log_err(_("node %i has unexpected replication state \"%s\"\n"),
|
PQExpBufferData event_details;
|
||||||
remote_node_id, remote_node_replication_state);
|
initPQExpBuffer(&event_details);
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("node %i has unexpected replication state \"%s\""),
|
||||||
|
remote_node_id, remote_node_replication_state);
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
create_event_record(local_conn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"standby_switchover",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
exit(ERR_SWITCHOVER_FAIL);
|
exit(ERR_SWITCHOVER_FAIL);
|
||||||
}
|
}
|
||||||
@@ -3590,6 +3657,13 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
/* TODO: verify this node's record was updated correctly */
|
/* TODO: verify this node's record was updated correctly */
|
||||||
|
|
||||||
|
create_event_record(local_conn,
|
||||||
|
&options,
|
||||||
|
options.node,
|
||||||
|
"standby_switchover",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
log_notice(_("switchover was successful\n"));
|
log_notice(_("switchover was successful\n"));
|
||||||
@@ -3800,6 +3874,8 @@ do_witness_create(void)
|
|||||||
char master_hba_file[MAXLEN];
|
char master_hba_file[MAXLEN];
|
||||||
bool success;
|
bool success;
|
||||||
bool record_created;
|
bool record_created;
|
||||||
|
|
||||||
|
char witness_port[MAXLEN];
|
||||||
char repmgr_user[MAXLEN];
|
char repmgr_user[MAXLEN];
|
||||||
char repmgr_db[MAXLEN];
|
char repmgr_db[MAXLEN];
|
||||||
|
|
||||||
@@ -3946,6 +4022,9 @@ do_witness_create(void)
|
|||||||
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname());
|
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname());
|
||||||
fputs(buf, pg_conf);
|
fputs(buf, pg_conf);
|
||||||
|
|
||||||
|
/* value provided with '-l/--local-port' */
|
||||||
|
strncpy(witness_port, runtime_options.localport, MAXLEN);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to extract a port number from the provided conninfo string.
|
* Attempt to extract a port number from the provided conninfo string.
|
||||||
* This will override any value provided with '-l/--local-port', as it's
|
* This will override any value provided with '-l/--local-port', as it's
|
||||||
@@ -3953,7 +4032,7 @@ do_witness_create(void)
|
|||||||
* be deprecated.
|
* be deprecated.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
get_conninfo_value(options.conninfo, "port", runtime_options.localport);
|
get_conninfo_value(options.conninfo, "port", witness_port);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If not specified by the user, the default port for the witness server
|
* If not specified by the user, the default port for the witness server
|
||||||
@@ -3961,10 +4040,10 @@ do_witness_create(void)
|
|||||||
* a separate instance on a normal node server, rather than on its own
|
* a separate instance on a normal node server, rather than on its own
|
||||||
* dedicated server.
|
* dedicated server.
|
||||||
*/
|
*/
|
||||||
if (!runtime_options.localport[0])
|
if (!witness_port[0])
|
||||||
strncpy(runtime_options.localport, WITNESS_DEFAULT_PORT, MAXLEN);
|
strncpy(witness_port, WITNESS_DEFAULT_PORT, MAXLEN);
|
||||||
|
|
||||||
xsnprintf(buf, sizeof(buf), "port = %s\n", runtime_options.localport);
|
xsnprintf(buf, sizeof(buf), "port = %s\n", witness_port);
|
||||||
fputs(buf, pg_conf);
|
fputs(buf, pg_conf);
|
||||||
|
|
||||||
xsnprintf(buf, sizeof(buf), "shared_preload_libraries = 'repmgr_funcs'\n");
|
xsnprintf(buf, sizeof(buf), "shared_preload_libraries = 'repmgr_funcs'\n");
|
||||||
@@ -3977,9 +4056,16 @@ do_witness_create(void)
|
|||||||
|
|
||||||
|
|
||||||
/* start new instance */
|
/* start new instance */
|
||||||
maxlen_snprintf(script, "%s %s -w -D %s start",
|
if (*options.start_command)
|
||||||
make_pg_path("pg_ctl"),
|
{
|
||||||
options.pg_ctl_options, runtime_options.dest_dir);
|
maxlen_snprintf(script, "%s", options.start_command);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(script, "%s %s -w -D %s start",
|
||||||
|
make_pg_path("pg_ctl"),
|
||||||
|
options.pg_ctl_options, runtime_options.dest_dir);
|
||||||
|
}
|
||||||
log_info(_("starting witness server: %s\n"), script);
|
log_info(_("starting witness server: %s\n"), script);
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
@@ -4006,7 +4092,7 @@ do_witness_create(void)
|
|||||||
* language function in C */
|
* language function in C */
|
||||||
maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s",
|
maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s",
|
||||||
make_pg_path("createuser"),
|
make_pg_path("createuser"),
|
||||||
runtime_options.localport,
|
witness_port,
|
||||||
runtime_options.witness_pwprompt ? "-P " : "",
|
runtime_options.witness_pwprompt ? "-P " : "",
|
||||||
runtime_options.superuser,
|
runtime_options.superuser,
|
||||||
repmgr_user);
|
repmgr_user);
|
||||||
@@ -4030,12 +4116,12 @@ do_witness_create(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check if we need to create a database */
|
/* check if we need to create a database */
|
||||||
if (runtime_options.dbname[0] && strcmp(runtime_options.dbname,"postgres") != 0 && runtime_options.localport[0])
|
if (runtime_options.dbname[0] && strcmp(runtime_options.dbname,"postgres") != 0 && witness_port[0])
|
||||||
{
|
{
|
||||||
/* create required db */
|
/* create required db */
|
||||||
maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s",
|
maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s",
|
||||||
make_pg_path("createdb"),
|
make_pg_path("createdb"),
|
||||||
runtime_options.localport,
|
witness_port,
|
||||||
runtime_options.superuser,
|
runtime_options.superuser,
|
||||||
repmgr_user,
|
repmgr_user,
|
||||||
repmgr_db);
|
repmgr_db);
|
||||||
@@ -4268,6 +4354,7 @@ do_help(void)
|
|||||||
printf(_(" %s [OPTIONS] master register\n"), progname());
|
printf(_(" %s [OPTIONS] master register\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"),
|
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"),
|
||||||
progname());
|
progname());
|
||||||
|
printf(_(" %s [OPTIONS] witness {create|unregister}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname());
|
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname());
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
printf(_("General options:\n"));
|
printf(_("General options:\n"));
|
||||||
@@ -4293,7 +4380,7 @@ do_help(void)
|
|||||||
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
||||||
" copied to\n"));
|
" copied to\n"));
|
||||||
printf(_(" -f, --config-file=PATH path to the configuration file\n"));
|
printf(_(" -f, --config-file=PATH path to the configuration file\n"));
|
||||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
printf(_(" -R, --remote-user=USERNAME database server username for rsync (default: \"%s\")\n"), runtime_options.username);
|
||||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||||
printf(_(" --check-upstream-config verify upstream server configuration\n"));
|
printf(_(" --check-upstream-config verify upstream server configuration\n"));
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
@@ -4347,7 +4434,7 @@ create_recovery_file(const char *data_dir, PGconn *primary_conn)
|
|||||||
char recovery_file_path[MAXLEN];
|
char recovery_file_path[MAXLEN];
|
||||||
char line[MAXLEN];
|
char line[MAXLEN];
|
||||||
|
|
||||||
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_COMMAND_FILE);
|
||||||
|
|
||||||
recovery_file = fopen(recovery_file_path, "w");
|
recovery_file = fopen(recovery_file_path, "w");
|
||||||
if (recovery_file == NULL)
|
if (recovery_file == NULL)
|
||||||
@@ -5698,15 +5785,34 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: better error handling */
|
if (outputbuf != NULL)
|
||||||
while (fgets(output, MAXLEN, fp) != NULL)
|
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(outputbuf, "%s", output);
|
/* TODO: better error handling */
|
||||||
|
while (fgets(output, MAXLEN, fp) != NULL)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(outputbuf, "%s", output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When executed remotely, repmgr commands which execute pg_ctl (particularly
|
||||||
|
* `repmgr standby follow`) will see the pg_ctl command appear to fail with a
|
||||||
|
* non-zero return code when the output from the executed pg_ctl command
|
||||||
|
* has nowhere to go, even though the command actually succeeds. We'll consume an
|
||||||
|
* arbitrary amount of output and throw it away to work around this.
|
||||||
|
*/
|
||||||
|
int i = 0;
|
||||||
|
while (fgets(output, MAXLEN, fp) != NULL && i < 10)
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pclose(fp);
|
pclose(fp);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n%s", outputbuf->data);
|
if (outputbuf != NULL)
|
||||||
|
log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n%s", outputbuf->data);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -101,6 +101,29 @@
|
|||||||
# (if not provided, defaults to system $PATH)
|
# (if not provided, defaults to system $PATH)
|
||||||
#pg_bindir=/usr/bin/
|
#pg_bindir=/usr/bin/
|
||||||
|
|
||||||
|
# service control commands
|
||||||
|
#
|
||||||
|
# repmgr provides options to to override the default pg_ctl commands
|
||||||
|
# used to stop, start and restart the PostgreSQL cluster
|
||||||
|
#
|
||||||
|
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
||||||
|
# to function correctly.
|
||||||
|
#
|
||||||
|
# If you use sudo, the user repmgr runs as (usually 'postgres') must have
|
||||||
|
# passwordless sudo access to execute the command
|
||||||
|
#
|
||||||
|
# For example, to use systemd, you may use the following configuration:
|
||||||
|
#
|
||||||
|
# # this is required when running sudo over ssh without -t:
|
||||||
|
# Defaults:postgres !requiretty
|
||||||
|
# postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.5, \
|
||||||
|
# /usr/bin/systemctl start postgresql-9.5, \
|
||||||
|
# /usr/bin/systemctl restart postgresql-9.5
|
||||||
|
#
|
||||||
|
# start_command = systemctl start postgresql-9.5
|
||||||
|
# stop_command = systemctl stop postgresql-9.5
|
||||||
|
# restart_command = systemctl restart postgresql-9.5
|
||||||
|
|
||||||
# external command options
|
# external command options
|
||||||
|
|
||||||
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||||
@@ -144,10 +167,18 @@
|
|||||||
#reconnect_interval=10
|
#reconnect_interval=10
|
||||||
|
|
||||||
# Autofailover options
|
# Autofailover options
|
||||||
#failover=manual # one of 'automatic', 'manual'
|
#failover=manual # one of 'automatic', 'manual' (default: manual)
|
||||||
# (default: manual)
|
# defines the action to take in the event of upstream failure
|
||||||
#priority=100 # a value of zero or less prevents the node being promoted to primary
|
#
|
||||||
|
# 'automatic': repmgrd will automatically attempt to promote the
|
||||||
|
# node or follow the new upstream node
|
||||||
|
# 'manual': repmgrd will take no action and the mode will require
|
||||||
|
# manual attention to reattach it to replication
|
||||||
|
|
||||||
|
#priority=100 # indicate a preferred priorty for promoting nodes
|
||||||
|
# a value of zero or less prevents the node being promoted to primary
|
||||||
# (default: 100)
|
# (default: 100)
|
||||||
|
|
||||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||||
|
|
||||||
|
|||||||
14
repmgr.h
14
repmgr.h
@@ -47,6 +47,15 @@
|
|||||||
#define NO_UPSTREAM_NODE -1
|
#define NO_UPSTREAM_NODE -1
|
||||||
#define UNKNOWN_NODE_ID -1
|
#define UNKNOWN_NODE_ID -1
|
||||||
|
|
||||||
|
#define OPT_HELP 1
|
||||||
|
#define OPT_CHECK_UPSTREAM_CONFIG 2
|
||||||
|
#define OPT_RECOVERY_MIN_APPLY_DELAY 3
|
||||||
|
#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 4
|
||||||
|
#define OPT_CONFIG_ARCHIVE_DIR 5
|
||||||
|
#define OPT_PG_REWIND 6
|
||||||
|
#define OPT_PWPROMPT 7
|
||||||
|
#define OPT_CSV 8
|
||||||
|
#define OPT_INITDB_NO_PWPROMPT 9
|
||||||
|
|
||||||
|
|
||||||
/* Run time options type */
|
/* Run time options type */
|
||||||
@@ -92,11 +101,10 @@ typedef struct
|
|||||||
char recovery_min_apply_delay[MAXLEN];
|
char recovery_min_apply_delay[MAXLEN];
|
||||||
|
|
||||||
/* deprecated command line options */
|
/* deprecated command line options */
|
||||||
char localport[MAXLEN];
|
char localport[MAXLEN];
|
||||||
bool initdb_no_pwprompt;
|
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", "", false }
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", ""}
|
||||||
|
|
||||||
struct BackupLabel
|
struct BackupLabel
|
||||||
{
|
{
|
||||||
|
|||||||
226
repmgrd.c
226
repmgrd.c
@@ -41,7 +41,10 @@
|
|||||||
#include "access/xlogdefs.h"
|
#include "access/xlogdefs.h"
|
||||||
#include "pqexpbuffer.h"
|
#include "pqexpbuffer.h"
|
||||||
|
|
||||||
|
/* Message strings passed in repmgrSharedState->location */
|
||||||
|
|
||||||
|
#define PASSIVE_NODE "PASSIVE_NODE"
|
||||||
|
#define LSN_QUERY_ERROR "LSN_QUERY_ERROR"
|
||||||
|
|
||||||
/* Local info */
|
/* Local info */
|
||||||
t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
@@ -59,6 +62,13 @@ t_node_info node_info;
|
|||||||
|
|
||||||
bool failover_done = false;
|
bool failover_done = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when `failover=manual`, and the upstream server has gone away,
|
||||||
|
* this flag is set to indicate we should connect to whatever the
|
||||||
|
* current master is to update monitoring information
|
||||||
|
*/
|
||||||
|
bool manual_mode_upstream_disconnected = false;
|
||||||
|
|
||||||
char *pid_file = NULL;
|
char *pid_file = NULL;
|
||||||
|
|
||||||
static void help(void);
|
static void help(void);
|
||||||
@@ -124,7 +134,7 @@ main(int argc, char **argv)
|
|||||||
{"monitoring-history", no_argument, NULL, 'm'},
|
{"monitoring-history", no_argument, NULL, 'm'},
|
||||||
{"daemonize", no_argument, NULL, 'd'},
|
{"daemonize", no_argument, NULL, 'd'},
|
||||||
{"pid-file", required_argument, NULL, 'p'},
|
{"pid-file", required_argument, NULL, 'p'},
|
||||||
{"help", no_argument, NULL, '?'},
|
{"help", no_argument, NULL, OPT_HELP},
|
||||||
{"version", no_argument, NULL, 'V'},
|
{"version", no_argument, NULL, 'V'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
@@ -158,6 +168,23 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
|
case '?':
|
||||||
|
/* Actual help option given */
|
||||||
|
if (strcmp(argv[optind - 1], "-?") == 0)
|
||||||
|
{
|
||||||
|
help();
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
/* unknown option reported by getopt */
|
||||||
|
else
|
||||||
|
goto unknown_option;
|
||||||
|
break;
|
||||||
|
case OPT_HELP:
|
||||||
|
help();
|
||||||
|
exit(SUCCESS);
|
||||||
|
case 'V':
|
||||||
|
printf("%s %s (PostgreSQL %s)\n", progname(), REPMGR_VERSION, PG_VERSION);
|
||||||
|
exit(SUCCESS);
|
||||||
case 'f':
|
case 'f':
|
||||||
config_file = optarg;
|
config_file = optarg;
|
||||||
break;
|
break;
|
||||||
@@ -173,13 +200,9 @@ main(int argc, char **argv)
|
|||||||
case 'p':
|
case 'p':
|
||||||
pid_file = optarg;
|
pid_file = optarg;
|
||||||
break;
|
break;
|
||||||
case '?':
|
|
||||||
help();
|
|
||||||
exit(SUCCESS);
|
|
||||||
case 'V':
|
|
||||||
printf("%s %s (PostgreSQL %s)\n", progname(), REPMGR_VERSION, PG_VERSION);
|
|
||||||
exit(SUCCESS);
|
|
||||||
default:
|
default:
|
||||||
|
unknown_option:
|
||||||
usage();
|
usage();
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -433,6 +456,7 @@ main(int argc, char **argv)
|
|||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log startup event */
|
/* Log startup event */
|
||||||
if (startup_event_logged == false)
|
if (startup_event_logged == false)
|
||||||
{
|
{
|
||||||
@@ -639,7 +663,7 @@ witness_monitor(void)
|
|||||||
local_options.master_response_timeout) != 1)
|
local_options.master_response_timeout) != 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Get local xlog info */
|
/* Get timestamp for monitoring update */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT CURRENT_TIMESTAMP");
|
sqlquery_snprintf(sqlquery, "SELECT CURRENT_TIMESTAMP");
|
||||||
|
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
@@ -720,6 +744,8 @@ standby_monitor(void)
|
|||||||
const char *upstream_node_type = NULL;
|
const char *upstream_node_type = NULL;
|
||||||
|
|
||||||
bool receiving_streamed_wal = true;
|
bool receiving_streamed_wal = true;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify that the local node is still available - if not there's
|
* Verify that the local node is still available - if not there's
|
||||||
* no point in doing much else anyway
|
* no point in doing much else anyway
|
||||||
@@ -741,15 +767,32 @@ standby_monitor(void)
|
|||||||
goto continue_monitoring_standby;
|
goto continue_monitoring_standby;
|
||||||
}
|
}
|
||||||
|
|
||||||
upstream_conn = get_upstream_connection(my_local_conn,
|
/*
|
||||||
local_options.cluster_name,
|
* Standby has `failover` set to manual and is disconnected from
|
||||||
local_options.node,
|
* replication following a prior upstream node failure - we'll
|
||||||
&upstream_node_id,
|
* find the master to be able to write monitoring information, if
|
||||||
upstream_conninfo);
|
* required
|
||||||
|
*/
|
||||||
|
if (manual_mode_upstream_disconnected == true)
|
||||||
|
{
|
||||||
|
upstream_conn = get_master_connection(my_local_conn,
|
||||||
|
local_options.cluster_name,
|
||||||
|
&upstream_node_id,
|
||||||
|
upstream_conninfo);
|
||||||
|
upstream_node_type = "master";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
upstream_conn = get_upstream_connection(my_local_conn,
|
||||||
|
local_options.cluster_name,
|
||||||
|
local_options.node,
|
||||||
|
&upstream_node_id,
|
||||||
|
upstream_conninfo);
|
||||||
|
|
||||||
upstream_node_type = (upstream_node_id == master_options.node)
|
upstream_node_type = (upstream_node_id == master_options.node)
|
||||||
? "master"
|
? "master"
|
||||||
: "upstream";
|
: "upstream";
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the upstream node is still available
|
* Check that the upstream node is still available
|
||||||
@@ -764,29 +807,52 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
int previous_master_node_id = master_options.node;
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
upstream_conn = NULL;
|
upstream_conn = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When `failover=manual`, no actual failover will be performed, instead
|
||||||
|
* the following happens:
|
||||||
|
* - find the new master
|
||||||
|
* - create an event notification `standby_disconnect_manual`
|
||||||
|
* - set a flag to indicate we're disconnected from replication,
|
||||||
|
*/
|
||||||
if (local_options.failover == MANUAL_FAILOVER)
|
if (local_options.failover == MANUAL_FAILOVER)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);
|
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the location string in shared memory to indicate to other
|
||||||
|
* repmgrd instances that we're *not* a promotion candidate and
|
||||||
|
* that other repmgrd instance should not expect location updates
|
||||||
|
* from us
|
||||||
|
*/
|
||||||
|
|
||||||
|
update_shared_memory(PASSIVE_NODE);
|
||||||
|
|
||||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
{
|
{
|
||||||
master_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
local_options.cluster_name, &master_options.node, NULL);
|
local_options.cluster_name, &master_options.node, NULL);
|
||||||
|
|
||||||
if (PQstatus(master_conn) == CONNECTION_OK)
|
if (PQstatus(master_conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Connected, we can continue the process so break the
|
* Connected, we can continue the process so break the
|
||||||
* loop
|
* loop
|
||||||
*/
|
*/
|
||||||
log_err(_("connected to node %d, continuing monitoring.\n"),
|
log_notice(_("connected to node %d, continuing monitoring.\n"),
|
||||||
master_options.node);
|
master_options.node);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* XXX this is the only place where `retry_promote_interval_secs`
|
||||||
|
* is used - this parameter should be renamed or possibly be replaced
|
||||||
|
*/
|
||||||
log_err(
|
log_err(
|
||||||
_("no new master found, waiting %i seconds before retry...\n"),
|
_("no new master found, waiting %i seconds before retry...\n"),
|
||||||
local_options.retry_promote_interval_secs
|
local_options.retry_promote_interval_secs
|
||||||
@@ -816,6 +882,36 @@ standby_monitor(void)
|
|||||||
|
|
||||||
terminate(ERR_DB_CON);
|
terminate(ERR_DB_CON);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* connected to a master - is it the same as the former upstream?
|
||||||
|
* if not:
|
||||||
|
* - create event standby_disconnect
|
||||||
|
* - set global "disconnected_manual_standby"
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (previous_master_node_id != master_options.node)
|
||||||
|
{
|
||||||
|
PQExpBufferData errmsg;
|
||||||
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&errmsg,
|
||||||
|
_("node %i is in manual failover mode and is now disconnected from replication"),
|
||||||
|
local_options.node);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "old master: %i; current: %i\n", previous_master_node_id, master_options.node);
|
||||||
|
|
||||||
|
manual_mode_upstream_disconnected = true;
|
||||||
|
|
||||||
|
create_event_record(master_conn,
|
||||||
|
&local_options,
|
||||||
|
local_options.node,
|
||||||
|
"standby_disconnect_manual",
|
||||||
|
/* here "true" indicates the action has occurred as expected */
|
||||||
|
true,
|
||||||
|
errmsg.data);
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
||||||
{
|
{
|
||||||
@@ -916,8 +1012,8 @@ standby_monitor(void)
|
|||||||
* the stream. If we set the local standby node as failed and it's now running
|
* the stream. If we set the local standby node as failed and it's now running
|
||||||
* and receiving replication data, we should activate it again.
|
* and receiving replication data, we should activate it again.
|
||||||
*/
|
*/
|
||||||
set_local_node_status();
|
set_local_node_status();
|
||||||
log_info(_("standby connection recovered!\n"));
|
log_info(_("standby connection recovered!\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fast path for the case where no history is requested */
|
/* Fast path for the case where no history is requested */
|
||||||
@@ -929,6 +1025,7 @@ standby_monitor(void)
|
|||||||
* from the upstream node to write monitoring information
|
* from the upstream node to write monitoring information
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* XXX not used? */
|
||||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||||
|
|
||||||
sprintf(sqlquery,
|
sprintf(sqlquery,
|
||||||
@@ -983,12 +1080,19 @@ standby_monitor(void)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
/* Get local xlog info */
|
/* Get local xlog info */
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT CURRENT_TIMESTAMP, "
|
" SELECT ts, "
|
||||||
"pg_catalog.pg_last_xlog_receive_location(), "
|
" receive_location, "
|
||||||
"pg_catalog.pg_last_xlog_replay_location(), "
|
" replay_location, "
|
||||||
"pg_catalog.pg_last_xact_replay_timestamp(), "
|
" replay_timestamp, "
|
||||||
"pg_catalog.pg_last_xlog_receive_location() >= pg_catalog.pg_last_xlog_replay_location()");
|
" receive_location >= replay_location "
|
||||||
|
" FROM (SELECT CURRENT_TIMESTAMP AS ts, "
|
||||||
|
" pg_catalog.pg_last_xlog_receive_location() AS receive_location, "
|
||||||
|
" pg_catalog.pg_last_xlog_replay_location() AS replay_location, "
|
||||||
|
" pg_catalog.pg_last_xact_replay_timestamp() AS replay_timestamp "
|
||||||
|
" ) q ");
|
||||||
|
|
||||||
|
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1073,10 +1177,12 @@ standby_monitor(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
|
|
||||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||||
|
|
||||||
|
apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Calculate replication lag */
|
/* Calculate replication lag */
|
||||||
if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
|
if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
|
||||||
{
|
{
|
||||||
@@ -1157,8 +1263,6 @@ do_master_failover(void)
|
|||||||
XLogRecPtr xlog_recptr;
|
XLogRecPtr xlog_recptr;
|
||||||
bool lsn_format_ok;
|
bool lsn_format_ok;
|
||||||
|
|
||||||
char last_xlog_replay_location[MAXLEN];
|
|
||||||
|
|
||||||
PGconn *node_conn = NULL;
|
PGconn *node_conn = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1339,8 +1443,8 @@ do_master_failover(void)
|
|||||||
" considered as new master and exit.\n"),
|
" considered as new master and exit.\n"),
|
||||||
PQerrorMessage(my_local_conn));
|
PQerrorMessage(my_local_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
|
|
||||||
update_shared_memory(last_xlog_replay_location);
|
update_shared_memory(LSN_QUERY_ERROR);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
@@ -1390,6 +1494,7 @@ do_master_failover(void)
|
|||||||
|
|
||||||
while (!nodes[i].is_ready)
|
while (!nodes[i].is_ready)
|
||||||
{
|
{
|
||||||
|
char location_value[MAXLEN];
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT %s.repmgr_get_last_standby_location()",
|
"SELECT %s.repmgr_get_last_standby_location()",
|
||||||
@@ -1405,7 +1510,11 @@ do_master_failover(void)
|
|||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
|
/* Copy the returned value as we'll need to reference it a few times */
|
||||||
|
strncpy(location_value, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
xlog_recptr = lsn_to_xlogrecptr(location_value, &lsn_format_ok);
|
||||||
|
|
||||||
/* If position reported as "invalid", check for format error or
|
/* If position reported as "invalid", check for format error or
|
||||||
* empty string; otherwise position is 0/0 and we need to continue
|
* empty string; otherwise position is 0/0 and we need to continue
|
||||||
@@ -1413,10 +1522,36 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
if (xlog_recptr == InvalidXLogRecPtr)
|
if (xlog_recptr == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
|
bool continue_loop = true;
|
||||||
|
|
||||||
if (lsn_format_ok == false)
|
if (lsn_format_ok == false)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The node is indicating it is not a promotion candidate -
|
||||||
|
* in this case we can store its invalid LSN to ensure it
|
||||||
|
* can't be a promotion candidate when comparing locations
|
||||||
|
*/
|
||||||
|
if (strcmp(location_value, PASSIVE_NODE) == 0)
|
||||||
|
{
|
||||||
|
log_debug("node %i is passive mode\n", nodes[i].node_id);
|
||||||
|
log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
|
||||||
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
|
continue_loop = false;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* This should probably never happen but if it does, rule the
|
||||||
|
* node out as a promotion candidate
|
||||||
|
*/
|
||||||
|
else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
|
||||||
|
{
|
||||||
|
log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"), nodes[i].node_id);
|
||||||
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
|
continue_loop = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
||||||
if (*PQgetvalue(res, 0, 0) == '\0')
|
else if (*location_value == '\0')
|
||||||
{
|
{
|
||||||
log_crit(
|
log_crit(
|
||||||
_("unable to obtain LSN from node %i"), nodes[i].node_id
|
_("unable to obtain LSN from node %i"), nodes[i].node_id
|
||||||
@@ -1425,8 +1560,8 @@ do_master_failover(void)
|
|||||||
_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
|
_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
|
||||||
);
|
);
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
|
/* XXX shouldn't we just ignore this node? */
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1434,25 +1569,29 @@ do_master_failover(void)
|
|||||||
* Very unlikely to happen; in the absence of any better
|
* Very unlikely to happen; in the absence of any better
|
||||||
* strategy keep checking
|
* strategy keep checking
|
||||||
*/
|
*/
|
||||||
log_warning(_("unable to parse LSN \"%s\"\n"),
|
else {
|
||||||
PQgetvalue(res, 0, 0));
|
log_warning(_("unable to parse LSN \"%s\"\n"),
|
||||||
|
location_value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_debug(
|
log_debug(
|
||||||
_("invalid LSN returned from node %i: '%s'\n"),
|
_("invalid LSN returned from node %i: '%s'\n"),
|
||||||
nodes[i].node_id,
|
nodes[i].node_id,
|
||||||
PQgetvalue(res, 0, 0)
|
location_value);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
/*
|
||||||
|
* If the node is still reporting an InvalidXLogRecPtr, it means
|
||||||
/* If position is 0/0, keep checking */
|
* its repmgrd hasn't yet had time to update it (either with a valid
|
||||||
/* XXX we should add a timeout here to prevent infinite looping
|
* XLogRecPtr or a message) so we continue looping.
|
||||||
|
*
|
||||||
|
* XXX we should add a timeout here to prevent infinite looping
|
||||||
* if the other node's repmgrd is not up
|
* if the other node's repmgrd is not up
|
||||||
*/
|
*/
|
||||||
continue;
|
if (continue_loop == true)
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodes[i].xlog_location < xlog_recptr)
|
if (nodes[i].xlog_location < xlog_recptr)
|
||||||
@@ -1460,8 +1599,7 @@ do_master_failover(void)
|
|||||||
nodes[i].xlog_location = xlog_recptr;
|
nodes[i].xlog_location = xlog_recptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0));
|
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
ready_nodes++;
|
ready_nodes++;
|
||||||
nodes[i].is_ready = true;
|
nodes[i].is_ready = true;
|
||||||
@@ -2137,7 +2275,7 @@ lsn_to_xlogrecptr(char *lsn, bool *format_ok)
|
|||||||
{
|
{
|
||||||
if (format_ok != NULL)
|
if (format_ok != NULL)
|
||||||
*format_ok = false;
|
*format_ok = false;
|
||||||
log_err(_("incorrect log location format: %s\n"), lsn);
|
log_warning(_("incorrect log location format: %s\n"), lsn);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user