mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
34 Commits
v3.3.2
...
REL3_4_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3988653d6c | ||
|
|
3f9b10a02c | ||
|
|
df34e7e8c5 | ||
|
|
668b2c9b59 | ||
|
|
9629fb6eb5 | ||
|
|
967b7c6876 | ||
|
|
120dd5b82d | ||
|
|
243b5d2b48 | ||
|
|
24a354c0a7 | ||
|
|
a4f572a1ff | ||
|
|
9ae8f5780b | ||
|
|
dd9df04334 | ||
|
|
5411225b6f | ||
|
|
cf90bc3224 | ||
|
|
6ba9077ba5 | ||
|
|
ead4866719 | ||
|
|
a0937e959f | ||
|
|
00391ba95d | ||
|
|
01edae1b20 | ||
|
|
b92d0cc696 | ||
|
|
2264848601 | ||
|
|
657125a3fb | ||
|
|
8fefb799ee | ||
|
|
37b458dfcd | ||
|
|
72b14a7274 | ||
|
|
19684f965b | ||
|
|
9690aeb030 | ||
|
|
774a3abf24 | ||
|
|
95d6f08ff4 | ||
|
|
33af998a1e | ||
|
|
18a56b266b | ||
|
|
b7d1e7a091 | ||
|
|
c7f9fbf524 | ||
|
|
e0ea9c3be4 |
@@ -21,9 +21,11 @@ copy of the relevant Copyright Assignment Form.
|
|||||||
Code style
|
Code style
|
||||||
----------
|
----------
|
||||||
|
|
||||||
Code in repmgr is formatted to a consistent style using the following command:
|
Code in repmgr should be formatted to the same standards as the main PostgreSQL
|
||||||
|
project. For more details see:
|
||||||
|
|
||||||
astyle --style=ansi --indent=tab --suffix=none *.c *.h
|
https://www.postgresql.org/docs/current/static/source-format.html
|
||||||
|
|
||||||
Contributors should reformat their code similarly before submitting code to
|
Contributors should reformat their code similarly before submitting code to
|
||||||
the project, in order to minimize merge conflicts with other work.
|
the project, in order to minimize merge conflicts with other work.
|
||||||
|
|
||||||
|
|||||||
6
HISTORY
6
HISTORY
@@ -1,3 +1,9 @@
|
|||||||
|
3.4.0 2019-02-
|
||||||
|
default log level is now INFO (Ian)
|
||||||
|
repmgr: fix `standby register --force` when updating existing node record (Ian)
|
||||||
|
repmgrd: set LSN shared memory value at standby startup (Ian)
|
||||||
|
repmgrd: improve logging during failover (Ian)
|
||||||
|
|
||||||
3.3.2 2017-06-01
|
3.3.2 2017-06-01
|
||||||
Add support for PostgreSQL 10 (Ian)
|
Add support for PostgreSQL 10 (Ian)
|
||||||
repmgr: ensure --replication-user option is honoured when passing database
|
repmgr: ensure --replication-user option is honoured when passing database
|
||||||
|
|||||||
47
README.md
47
README.md
@@ -7,9 +7,13 @@ replication capabilities with utilities to set up standby servers, monitor
|
|||||||
replication, and perform administrative tasks such as failover or switchover
|
replication, and perform administrative tasks such as failover or switchover
|
||||||
operations.
|
operations.
|
||||||
|
|
||||||
The current `repmgr` version (3.3) supports all PostgreSQL versions from
|
This `repmgr` version (3.4) supports PostgreSQL versions from
|
||||||
9.3 to 9.6.
|
9.3 to 9.6.
|
||||||
|
|
||||||
|
*NOTE*: we strongly recommend using the repmgr 4.x series, which contains
|
||||||
|
many new features and usability enhancements and is being actively developed
|
||||||
|
and maintained.
|
||||||
|
|
||||||
Overview
|
Overview
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -189,6 +193,14 @@ system.
|
|||||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||||
( https://wiki.postgresql.org/wiki/Apt ).
|
( https://wiki.postgresql.org/wiki/Apt ).
|
||||||
|
|
||||||
|
*NOTE*: repmgr 3.3 packages are now only available via a 2ndQuadrant-hosted
|
||||||
|
repository which can be installed like this:
|
||||||
|
|
||||||
|
apt-key adv --fetch-keys http://packages.2ndquadrant.com/repmgr3/apt/0xD3FA41F6.asc
|
||||||
|
|
||||||
|
echo deb http://packages.2ndquadrant.com/repmgr3/apt/ $(lsb_release -cs)-2ndquadrant main > /etc/apt/sources.list.d/repmgr3.list
|
||||||
|
|
||||||
|
|
||||||
See `PACKAGES.md` for details on building .deb and .rpm packages from the
|
See `PACKAGES.md` for details on building .deb and .rpm packages from the
|
||||||
`repmgr` source code.
|
`repmgr` source code.
|
||||||
|
|
||||||
@@ -202,7 +214,7 @@ See `PACKAGES.md` for details on building .deb and .rpm packages from the
|
|||||||
Release tarballs are also available:
|
Release tarballs are also available:
|
||||||
|
|
||||||
https://github.com/2ndQuadrant/repmgr/releases
|
https://github.com/2ndQuadrant/repmgr/releases
|
||||||
http://repmgr.org/downloads.php
|
https://repmgr.org/
|
||||||
|
|
||||||
`repmgr` is compiled in the same way as a PostgreSQL extension using the PGXS
|
`repmgr` is compiled in the same way as a PostgreSQL extension using the PGXS
|
||||||
infrastructure, e.g.:
|
infrastructure, e.g.:
|
||||||
@@ -314,7 +326,13 @@ The following replication settings may need to be adjusted:
|
|||||||
max_wal_senders = 10
|
max_wal_senders = 10
|
||||||
|
|
||||||
# Ensure WAL files contain enough information to enable read-only queries
|
# Ensure WAL files contain enough information to enable read-only queries
|
||||||
# on the standby
|
# on the standby.
|
||||||
|
#
|
||||||
|
# PostgreSQL 9.5 and earlier: one of 'hot_standby' or 'logical'
|
||||||
|
# PostgreSQL 9.6 and later: one of 'replica' or 'logical'
|
||||||
|
# ('hot_standby' will still be accepted as an alias for 'replica')
|
||||||
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/static/runtime-config-wal.html#GUC-WAL-LEVEL
|
||||||
|
|
||||||
wal_level = 'hot_standby'
|
wal_level = 'hot_standby'
|
||||||
|
|
||||||
@@ -400,7 +418,8 @@ least the following parameters:
|
|||||||
- `conninfo`: a valid connection string for the `repmgr` database on the
|
- `conninfo`: a valid connection string for the `repmgr` database on the
|
||||||
*current* server. (On the standby, the database will not yet exist, but
|
*current* server. (On the standby, the database will not yet exist, but
|
||||||
`repmgr` needs to know the connection details to complete the setup
|
`repmgr` needs to know the connection details to complete the setup
|
||||||
process).
|
process). *NOTE* this must be a keyword/value string, not a connection
|
||||||
|
URI; this limitation will be removed in a future `repmgr` version.
|
||||||
|
|
||||||
`repmgr.conf` should not be stored inside the PostgreSQL data directory,
|
`repmgr.conf` should not be stored inside the PostgreSQL data directory,
|
||||||
as it could be overwritten when setting up or reinitialising the PostgreSQL
|
as it could be overwritten when setting up or reinitialising the PostgreSQL
|
||||||
@@ -425,7 +444,7 @@ to include this schema name, e.g.
|
|||||||
### Initialise the master server
|
### Initialise the master server
|
||||||
|
|
||||||
To enable `repmgr` to support a replication cluster, the master node must
|
To enable `repmgr` to support a replication cluster, the master node must
|
||||||
be registered with `repmgr`, which creates the `repmgr` database and adds
|
be registered with `repmgr`, which creates the `repmgr` metadatabase and adds
|
||||||
a metadata record for the server:
|
a metadata record for the server:
|
||||||
|
|
||||||
$ repmgr -f repmgr.conf master register
|
$ repmgr -f repmgr.conf master register
|
||||||
@@ -631,7 +650,7 @@ In order to enable Barman support for `repmgr standby clone`, you must
|
|||||||
ensure that:
|
ensure that:
|
||||||
|
|
||||||
- the name of the server configured in Barman is equal to the
|
- the name of the server configured in Barman is equal to the
|
||||||
`cluster_name` setting in `repmgr.conf`;
|
`cluster` setting in `repmgr.conf`;
|
||||||
- the `barman_server` setting in `repmgr.conf` is set to the SSH
|
- the `barman_server` setting in `repmgr.conf` is set to the SSH
|
||||||
hostname of the Barman server;
|
hostname of the Barman server;
|
||||||
- the `restore_command` setting in `repmgr.conf` is configured to
|
- the `restore_command` setting in `repmgr.conf` is configured to
|
||||||
@@ -996,6 +1015,13 @@ both passwordless SSH access and the path of `repmgr.conf` on that server.
|
|||||||
> careful preparation and with adequate attention. In particular you should
|
> careful preparation and with adequate attention. In particular you should
|
||||||
> be confident that your network environment is stable and reliable.
|
> be confident that your network environment is stable and reliable.
|
||||||
>
|
>
|
||||||
|
> Additionally you should be sure that the current master can be shut down
|
||||||
|
> quickly and cleanly. In particular, access from applications should be
|
||||||
|
> minimalized or preferably blocked completely. Also check that there is
|
||||||
|
> no backlog of files waiting to be archived, as PostgreSQL will not shut
|
||||||
|
> down until archiving completes, and that any standbys attached to the
|
||||||
|
> current primary don't have a significant amount of replication lag.
|
||||||
|
>
|
||||||
> We recommend running `repmgr standby switchover` at the most verbose
|
> We recommend running `repmgr standby switchover` at the most verbose
|
||||||
> logging level (`--log-level DEBUG --verbose`) and capturing all output
|
> logging level (`--log-level DEBUG --verbose`) and capturing all output
|
||||||
> to assist troubleshooting any problems.
|
> to assist troubleshooting any problems.
|
||||||
@@ -1062,7 +1088,7 @@ should have been updated to reflect this:
|
|||||||
|
|
||||||
### Caveats
|
### Caveats
|
||||||
|
|
||||||
- The functionality provided `repmgr standby switchover` is primarily aimed
|
- The functionality provided by `repmgr standby switchover` is primarily aimed
|
||||||
at a two-server master/standby replication cluster and currently does
|
at a two-server master/standby replication cluster and currently does
|
||||||
not support additional standbys.
|
not support additional standbys.
|
||||||
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
- `repmgr standby switchover` is designed to use the `pg_rewind` utility,
|
||||||
@@ -1076,11 +1102,6 @@ should have been updated to reflect this:
|
|||||||
the `repmgrd` may try and promote a standby by itself.
|
the `repmgrd` may try and promote a standby by itself.
|
||||||
- Any other standbys attached to the old master will need to be manually
|
- Any other standbys attached to the old master will need to be manually
|
||||||
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
instructed to point to the new master (e.g. with `repmgr standby follow`).
|
||||||
- You must ensure that following a server start using `pg_ctl`, log output
|
|
||||||
is not send to STDERR (the default behaviour). If logging is not configured,
|
|
||||||
we recommend setting `logging_collector=on` in `postgresql.conf` and
|
|
||||||
providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
|
|
||||||
parameter.
|
|
||||||
|
|
||||||
We hope to remove some of these restrictions in future versions of `repmgr`.
|
We hope to remove some of these restrictions in future versions of `repmgr`.
|
||||||
|
|
||||||
@@ -1610,7 +1631,7 @@ which contains connection details for the local database.
|
|||||||
|
|
||||||
Creates a witness server as a separate PostgreSQL instance. This instance
|
Creates a witness server as a separate PostgreSQL instance. This instance
|
||||||
can be on a separate server or a server running an existing node. The
|
can be on a separate server or a server running an existing node. The
|
||||||
witness server contain a copy of the repmgr metadata tables but will not
|
witness server contains a copy of the repmgr metadata tables but will not
|
||||||
be set up as a standby; instead it will update its metadata copy each
|
be set up as a standby; instead it will update its metadata copy each
|
||||||
time a failover occurs.
|
time a failover occurs.
|
||||||
|
|
||||||
|
|||||||
4
config.c
4
config.c
@@ -30,7 +30,7 @@ static void tablespace_list_append(t_configuration_options *options, const char
|
|||||||
static void exit_with_errors(ItemList *config_errors);
|
static void exit_with_errors(ItemList *config_errors);
|
||||||
|
|
||||||
const static char *_progname = NULL;
|
const static char *_progname = NULL;
|
||||||
static char config_file_path[MAXPGPATH];
|
static char config_file_path[MAXPGPATH] = "";
|
||||||
static bool config_file_provided = false;
|
static bool config_file_provided = false;
|
||||||
bool config_file_found = false;
|
bool config_file_found = false;
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ progname(void)
|
|||||||
* added/changed in reload_config()
|
* added/changed in reload_config()
|
||||||
*
|
*
|
||||||
* NOTE: this function is called before the logger is set up, so we need
|
* NOTE: this function is called before the logger is set up, so we need
|
||||||
* to handle the verbose option ourselves; also the default log level is NOTICE,
|
* to handle the verbose option ourselves; also the default log level is INFO,
|
||||||
* so we can't use DEBUG.
|
* so we can't use DEBUG.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
|
|||||||
96
dbutils.c
96
dbutils.c
@@ -322,8 +322,6 @@ is_standby(PGconn *conn)
|
|||||||
bool
|
bool
|
||||||
is_pgup(PGconn *conn, int timeout)
|
is_pgup(PGconn *conn, int timeout)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
|
||||||
|
|
||||||
/* Check the connection status twice in case it changes after reset */
|
/* Check the connection status twice in case it changes after reset */
|
||||||
bool twice = false;
|
bool twice = false;
|
||||||
|
|
||||||
@@ -346,8 +344,7 @@ is_pgup(PGconn *conn, int timeout)
|
|||||||
if (wait_connection_availability(conn, timeout) != 1)
|
if (wait_connection_availability(conn, timeout) != 1)
|
||||||
goto failed;
|
goto failed;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT 1");
|
if (PQsendQuery(conn, "SELECT 1") == 0)
|
||||||
if (PQsendQuery(conn, sqlquery) == 0)
|
|
||||||
{
|
{
|
||||||
log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
|
log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
@@ -428,6 +425,8 @@ int
|
|||||||
get_server_version(PGconn *conn, char *server_version)
|
get_server_version(PGconn *conn, char *server_version)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
int server_version_num;
|
||||||
|
|
||||||
res = PQexec(conn,
|
res = PQexec(conn,
|
||||||
"SELECT current_setting('server_version_num'), "
|
"SELECT current_setting('server_version_num'), "
|
||||||
" current_setting('server_version')");
|
" current_setting('server_version')");
|
||||||
@@ -441,9 +440,12 @@ get_server_version(PGconn *conn, char *server_version)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (server_version != NULL)
|
if (server_version != NULL)
|
||||||
strcpy(server_version, PQgetvalue(res, 0, 0));
|
strcpy(server_version, PQgetvalue(res, 0, 1));
|
||||||
|
|
||||||
return atoi(PQgetvalue(res, 0, 0));
|
server_version_num = atoi(PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
return server_version_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1716,6 +1718,27 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
create_checkpoint(PGconn *conn)
|
||||||
|
{
|
||||||
|
char sqlquery[MAXLEN];
|
||||||
|
PGresult *res;
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery, "CHECKPOINT");
|
||||||
|
log_verbose(LOG_DEBUG, "checkpoint:\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to create CHECKPOINT:\n%s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("CHECKPOINT created\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||||
@@ -2069,3 +2092,64 @@ get_data_checksum_version(const char *data_directory)
|
|||||||
|
|
||||||
return (int)control_file.data_checksum_version;
|
return (int)control_file.data_checksum_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================== */
|
||||||
|
/* backported from repmgr 4.x */
|
||||||
|
/* ========================== */
|
||||||
|
|
||||||
|
XLogRecPtr
|
||||||
|
parse_lsn(const char *str)
|
||||||
|
{
|
||||||
|
XLogRecPtr ptr = InvalidXLogRecPtr;
|
||||||
|
uint32 high,
|
||||||
|
low;
|
||||||
|
|
||||||
|
if (sscanf(str, "%x/%x", &high, &low) == 2)
|
||||||
|
ptr = (((XLogRecPtr) high) << 32) + (XLogRecPtr) low;
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
XLogRecPtr
|
||||||
|
get_last_wal_receive_location(PGconn *conn)
|
||||||
|
{
|
||||||
|
PGresult *res = NULL;
|
||||||
|
XLogRecPtr ptr = InvalidXLogRecPtr;
|
||||||
|
|
||||||
|
if (PQserverVersion(conn) >= 100000)
|
||||||
|
{
|
||||||
|
res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
res = PQexec(conn, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
ptr = parse_lsn(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_server_available(const char *conninfo)
|
||||||
|
{
|
||||||
|
PGPing status = PQping(conninfo);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
|
||||||
|
|
||||||
|
if (status == PQPING_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
log_warning("is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|||||||
10
dbutils.h
10
dbutils.h
@@ -28,6 +28,8 @@
|
|||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define format_lsn(x) (uint32) (x >> 32), (uint32) x
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
UNKNOWN = 0,
|
UNKNOWN = 0,
|
||||||
MASTER,
|
MASTER,
|
||||||
@@ -135,9 +137,15 @@ bool update_node_record(PGconn *conn, char *action, int node, char *type,
|
|||||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||||
|
void create_checkpoint(PGconn *conn);
|
||||||
|
|
||||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||||
t_server_type parse_node_type(const char *type);
|
t_server_type parse_node_type(const char *type);
|
||||||
int get_data_checksum_version(const char *data_directory);
|
int get_data_checksum_version(const char *data_directory);
|
||||||
#endif
|
|
||||||
|
|
||||||
|
/* backported from repmgr 4.x */
|
||||||
|
XLogRecPtr parse_lsn(const char *str);
|
||||||
|
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
||||||
|
bool is_server_available(const char *conninfo);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -49,6 +49,14 @@ the `%include` directive (available from PgBouncer 1.6) to include a separate
|
|||||||
configuration file, `/etc/pgbouncer.database.ini`, which will be modified by
|
configuration file, `/etc/pgbouncer.database.ini`, which will be modified by
|
||||||
`repmgr`.
|
`repmgr`.
|
||||||
|
|
||||||
|
* * *
|
||||||
|
|
||||||
|
> *NOTE*: in this self-contained demonstration, `pgbouncer` is running on the
|
||||||
|
> database servers, however in a production environment it will make more
|
||||||
|
> sense to run `pgbouncer` on either separate nodes or the application server.
|
||||||
|
|
||||||
|
* * *
|
||||||
|
|
||||||
`/etc/pgbouncer.ini` should look something like this:
|
`/etc/pgbouncer.ini` should look something like this:
|
||||||
|
|
||||||
[pgbouncer]
|
[pgbouncer]
|
||||||
@@ -125,7 +133,7 @@ The actual script is as follows; adjust the configurable items as appropriate:
|
|||||||
|
|
||||||
psql -d $REPMGR_DB -U $REPMGR_USER -t -A \
|
psql -d $REPMGR_DB -U $REPMGR_USER -t -A \
|
||||||
-c "SELECT '${PGBOUNCER_DATABASE}-ro= ' || conninfo || ' application_name=pgbouncer_${HOST}' \
|
-c "SELECT '${PGBOUNCER_DATABASE}-ro= ' || conninfo || ' application_name=pgbouncer_${HOST}' \
|
||||||
FROM $REPMGR_SCHEMA.repl_nodes \
|
FROM ${REPMGR_SCHEMA}.repl_nodes \
|
||||||
WHERE node_name='${HOST}'" >> $PGBOUNCER_DATABASE_INI_NEW
|
WHERE node_name='${HOST}'" >> $PGBOUNCER_DATABASE_INI_NEW
|
||||||
|
|
||||||
rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$PGBOUNCER_DATABASE_INI
|
rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$PGBOUNCER_DATABASE_INI
|
||||||
|
|||||||
4
log.c
4
log.c
@@ -44,8 +44,8 @@ static void _stderr_log_with_level(const char *level_name, int level, const char
|
|||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||||
|
|
||||||
int log_type = REPMGR_STDERR;
|
int log_type = REPMGR_STDERR;
|
||||||
int log_level = LOG_NOTICE;
|
int log_level = LOG_INFO;
|
||||||
int last_log_level = LOG_NOTICE;
|
int last_log_level = LOG_INFO;
|
||||||
int verbose_logging = false;
|
int verbose_logging = false;
|
||||||
int terse_logging = false;
|
int terse_logging = false;
|
||||||
/*
|
/*
|
||||||
|
|||||||
113
repmgr.c
113
repmgr.c
@@ -283,7 +283,11 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Pre-set any defaults , which can be overwritten if matching
|
* Pre-set any defaults , which can be overwritten if matching
|
||||||
* command line parameters are provided
|
* command line parameters are provided.
|
||||||
|
*
|
||||||
|
* Note: PQconndefaults() does not provide a default value for
|
||||||
|
* "dbname", but if none is provided will default to "username"
|
||||||
|
* when the connection is made.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (c = 0; c < source_conninfo.size && source_conninfo.keywords[c]; c++)
|
for (c = 0; c < source_conninfo.size && source_conninfo.keywords[c]; c++)
|
||||||
@@ -316,7 +320,6 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* set default user for -R/--remote-user */
|
/* set default user for -R/--remote-user */
|
||||||
|
|
||||||
{
|
{
|
||||||
struct passwd *pw = NULL;
|
struct passwd *pw = NULL;
|
||||||
|
|
||||||
@@ -330,15 +333,6 @@ main(int argc, char **argv)
|
|||||||
strncpy(runtime_options.username, pw->pw_name, MAXLEN);
|
strncpy(runtime_options.username, pw->pw_name, MAXLEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Though libpq will default to the username as dbname, PQconndefaults()
|
|
||||||
* doesn't return this
|
|
||||||
*/
|
|
||||||
if (runtime_options.dbname[0] == '\0')
|
|
||||||
{
|
|
||||||
strncpy(runtime_options.dbname, runtime_options.username, MAXLEN);
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "?Vd:h:p:U:S:D:f:R:w:k:FWIvb:rcL:tm:C:l:P", long_options,
|
while ((c = getopt_long(argc, argv, "?Vd:h:p:U:S:D:f:R:w:k:FWIvb:rcL:tm:C:l:P", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
@@ -1307,7 +1301,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
|
|||||||
*/
|
*/
|
||||||
appendPQExpBuffer(&command,
|
appendPQExpBuffer(&command,
|
||||||
"\"%s -d '%s' --cluster '%s' ",
|
"\"%s -d '%s' --cluster '%s' ",
|
||||||
make_pg_path("repmgr"),
|
make_pg_path((char *)progname()),
|
||||||
PQgetvalue(res, i, 0),
|
PQgetvalue(res, i, 0),
|
||||||
PQgetvalue(res, i, 5));
|
PQgetvalue(res, i, 5));
|
||||||
|
|
||||||
@@ -1572,7 +1566,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
|
|||||||
|
|
||||||
appendPQExpBuffer(&command,
|
appendPQExpBuffer(&command,
|
||||||
"%s -d '%s' --cluster '%s' --node=%i ",
|
"%s -d '%s' --cluster '%s' --node=%i ",
|
||||||
make_pg_path("repmgr"),
|
make_pg_path((char *)progname()),
|
||||||
PQgetvalue(res, i, 0),
|
PQgetvalue(res, i, 0),
|
||||||
options.cluster_name,
|
options.cluster_name,
|
||||||
remote_node_id);
|
remote_node_id);
|
||||||
@@ -2119,7 +2113,7 @@ do_standby_register(void)
|
|||||||
|
|
||||||
if (node_result)
|
if (node_result)
|
||||||
{
|
{
|
||||||
if (node_record.active == true)
|
if (node_record.active == true && node_record.node_id != options.node)
|
||||||
{
|
{
|
||||||
log_err(_("Node %i exists already with node_name \"%s\"\n"),
|
log_err(_("Node %i exists already with node_name \"%s\"\n"),
|
||||||
node_record.node_id,
|
node_record.node_id,
|
||||||
@@ -2638,6 +2632,7 @@ get_tablespace_data_barman
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
get_barman_property(char *dst, char *name, char *local_repmgr_directory)
|
get_barman_property(char *dst, char *name, char *local_repmgr_directory)
|
||||||
{
|
{
|
||||||
@@ -3606,15 +3601,15 @@ do_standby_clone(void)
|
|||||||
/* Only from 9.4 */
|
/* Only from 9.4 */
|
||||||
"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
|
"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
|
||||||
/* Already in 9.3 */
|
/* Already in 9.3 */
|
||||||
"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp", "pg_tblspc",
|
"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp",
|
||||||
"pg_twophase", "pg_xlog", 0
|
"pg_subtrans", "pg_tblspc", "pg_twophase", "pg_xlog", 0
|
||||||
};
|
};
|
||||||
const int vers[] = {
|
const int vers[] = {
|
||||||
100000,
|
100000,
|
||||||
90500,
|
90500,
|
||||||
90400, 90400, 90400, 90400, 90400,
|
90400, 90400, 90400, 90400, 90400,
|
||||||
0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0,
|
||||||
0, -100000, 0
|
0, 0, 0, -100000
|
||||||
};
|
};
|
||||||
for (i = 0; dirs[i]; i++)
|
for (i = 0; dirs[i]; i++)
|
||||||
{
|
{
|
||||||
@@ -4092,7 +4087,16 @@ stop_backup:
|
|||||||
|
|
||||||
/* Finally, write the recovery.conf file */
|
/* Finally, write the recovery.conf file */
|
||||||
|
|
||||||
create_recovery_file(local_data_directory, &recovery_conninfo);
|
if (create_recovery_file( local_data_directory, &recovery_conninfo) == false)
|
||||||
|
{
|
||||||
|
/* create_recovery_file() will log an error */
|
||||||
|
log_notice(_("unable to create recovery.conf; see preceding error messages\n"));
|
||||||
|
log_hint(_("data directory (\"%s\") may need to be cleaned up manually\n"),
|
||||||
|
local_data_directory);
|
||||||
|
|
||||||
|
PQfinish(source_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
if (mode == barman)
|
if (mode == barman)
|
||||||
{
|
{
|
||||||
@@ -4197,27 +4201,13 @@ stop_backup:
|
|||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
parse_lsn(XLogRecPtr *ptr, const char *str)
|
|
||||||
{
|
|
||||||
uint32 high, low;
|
|
||||||
|
|
||||||
if (sscanf(str, "%x/%x", &high, &low) != 2)
|
|
||||||
return;
|
|
||||||
|
|
||||||
*ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static XLogRecPtr
|
static XLogRecPtr
|
||||||
parse_label_lsn(const char *label_key, const char *label_value)
|
parse_label_lsn(const char *label_key, const char *label_value)
|
||||||
{
|
{
|
||||||
XLogRecPtr ptr = InvalidXLogRecPtr;
|
XLogRecPtr ptr = parse_lsn(label_value);
|
||||||
|
|
||||||
parse_lsn(&ptr, label_value);
|
/* parse_lsn() will return InvalidXLogRecPtr if it can't parse the label value */
|
||||||
|
|
||||||
/* parse_lsn() will not modify ptr if it can't parse the label value */
|
|
||||||
if (ptr == InvalidXLogRecPtr)
|
if (ptr == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
||||||
@@ -4511,6 +4501,12 @@ do_standby_promote(void)
|
|||||||
|
|
||||||
log_notice(_("STANDBY PROMOTE successful\n"));
|
log_notice(_("STANDBY PROMOTE successful\n"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Force a checkpoint so that pg_rewind on former master can tell that the
|
||||||
|
* servers have diverged.
|
||||||
|
*/
|
||||||
|
create_checkpoint(conn);
|
||||||
|
|
||||||
/* Log the event */
|
/* Log the event */
|
||||||
create_event_record(conn,
|
create_event_record(conn,
|
||||||
&options,
|
&options,
|
||||||
@@ -4978,7 +4974,9 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("unable to connect via SSH to host %s, user %s\n"), remote_host, runtime_options.remote_user);
|
log_err(_("unable to connect via SSH to host \"%s\", user \"%s\"\n"),
|
||||||
|
remote_host, runtime_options.remote_user);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_pg_setting(remote_conn, "data_directory", remote_data_directory) == false)
|
if (get_pg_setting(remote_conn, "data_directory", remote_data_directory) == false)
|
||||||
@@ -5281,7 +5279,7 @@ do_standby_switchover(void)
|
|||||||
initPQExpBuffer(&remote_command_str);
|
initPQExpBuffer(&remote_command_str);
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
"%s standby archive-config -f ",
|
"%s standby archive-config -f ",
|
||||||
make_pg_path("repmgr"));
|
make_pg_path((char *)progname()));
|
||||||
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
" --config-archive-dir=");
|
" --config-archive-dir=");
|
||||||
@@ -5459,7 +5457,7 @@ do_standby_switchover(void)
|
|||||||
/* --force */
|
/* --force */
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
"%s standby restore-config -D ",
|
"%s standby restore-config -D ",
|
||||||
make_pg_path("repmgr"));
|
make_pg_path((char *)progname()));
|
||||||
appendShellString(&remote_command_str, remote_data_directory);
|
appendShellString(&remote_command_str, remote_data_directory);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -5534,14 +5532,20 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
"%s -D ",
|
"%s -D ",
|
||||||
make_pg_path("repmgr"));
|
make_pg_path((char *)progname()));
|
||||||
appendShellString(&remote_command_str, remote_data_directory);
|
appendShellString(&remote_command_str, remote_data_directory);
|
||||||
appendPQExpBuffer(&remote_command_str, " -f ");
|
appendPQExpBuffer(&remote_command_str, " -f ");
|
||||||
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
" %s --rsync-only --force --ignore-external-config-files standby clone",
|
" %s --rsync-only --force standby clone",
|
||||||
repmgr_db_cli_params);
|
repmgr_db_cli_params);
|
||||||
|
|
||||||
|
if (runtime_options.copy_external_config_files == true)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&remote_command_str,
|
||||||
|
" --copy-external-config-files");
|
||||||
|
}
|
||||||
|
|
||||||
log_debug("Executing:\n%s\n", remote_command_str.data);
|
log_debug("Executing:\n%s\n", remote_command_str.data);
|
||||||
|
|
||||||
initPQExpBuffer(&command_output);
|
initPQExpBuffer(&command_output);
|
||||||
@@ -5565,7 +5569,7 @@ do_standby_switchover(void)
|
|||||||
initPQExpBuffer(&remote_command_str);
|
initPQExpBuffer(&remote_command_str);
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
"%s -D ",
|
"%s -D ",
|
||||||
make_pg_path("repmgr"));
|
make_pg_path((char *)progname()));
|
||||||
appendShellString(&remote_command_str, remote_data_directory);
|
appendShellString(&remote_command_str, remote_data_directory);
|
||||||
appendPQExpBuffer(&remote_command_str, " -f ");
|
appendPQExpBuffer(&remote_command_str, " -f ");
|
||||||
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
appendShellString(&remote_command_str, runtime_options.remote_config_file);
|
||||||
@@ -5864,7 +5868,8 @@ do_standby_restore_config(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((arcdir_ent = readdir(arcdir)) != NULL) {
|
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||||
|
{
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
char arcdir_ent_path[MAXPGPATH];
|
char arcdir_ent_path[MAXPGPATH];
|
||||||
PQExpBufferData src_file;
|
PQExpBufferData src_file;
|
||||||
@@ -5952,9 +5957,9 @@ do_witness_create(void)
|
|||||||
char master_hba_file[MAXLEN];
|
char master_hba_file[MAXLEN];
|
||||||
bool success;
|
bool success;
|
||||||
|
|
||||||
char witness_port[MAXLEN];
|
char witness_port[MAXLEN] = "";
|
||||||
char repmgr_user[MAXLEN];
|
char repmgr_user[MAXLEN] = "";
|
||||||
char repmgr_db[MAXLEN];
|
char repmgr_db[MAXLEN] = "";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract the repmgr user and database names from the conninfo string
|
* Extract the repmgr user and database names from the conninfo string
|
||||||
@@ -7371,7 +7376,8 @@ check_parameters_for_action(const int action)
|
|||||||
item_list_append(&cli_warnings, _("-c/--fast-checkpoint can only be used when executing STANDBY CLONE"));
|
item_list_append(&cli_warnings, _("-c/--fast-checkpoint can only be used when executing STANDBY CLONE"));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.copy_external_config_files)
|
/* can be used for "standby switchover" too */
|
||||||
|
if (action != STANDBY_SWITCHOVER && runtime_options.copy_external_config_files)
|
||||||
{
|
{
|
||||||
item_list_append(&cli_warnings, _("--copy-external-config-files can only be used when executing STANDBY CLONE"));
|
item_list_append(&cli_warnings, _("--copy-external-config-files can only be used when executing STANDBY CLONE"));
|
||||||
}
|
}
|
||||||
@@ -8418,17 +8424,12 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
while (fgets(output, MAXLEN, fp) != NULL)
|
||||||
* When executed remotely, repmgr commands which execute pg_ctl (particularly
|
|
||||||
* `repmgr standby follow`) will see the pg_ctl command appear to fail with a
|
|
||||||
* non-zero return code when the output from the executed pg_ctl command
|
|
||||||
* has nowhere to go, even though the command actually succeeds. We'll consume an
|
|
||||||
* arbitrary amount of output and throw it away to work around this.
|
|
||||||
*/
|
|
||||||
int i = 0;
|
|
||||||
while (fgets(output, MAXLEN, fp) != NULL && i < 10)
|
|
||||||
{
|
{
|
||||||
i++;
|
if (!feof(fp))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,11 +26,14 @@
|
|||||||
# the server's hostname or another identifier unambiguously
|
# the server's hostname or another identifier unambiguously
|
||||||
# associated with the server to avoid confusion
|
# associated with the server to avoid confusion
|
||||||
|
|
||||||
# Database connection information as a conninfo string
|
# Database connection information as a conninfo string (this must be a
|
||||||
# This must be accessible to all servers in the cluster; for details see:
|
# keyword/value string, not a connection URI).
|
||||||
#
|
#
|
||||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||||
#
|
#
|
||||||
|
# All servers in the cluster must be able to access the database
|
||||||
|
# using this connection string.
|
||||||
|
#
|
||||||
#conninfo='host=192.168.204.104 dbname=repmgr user=repmgr'
|
#conninfo='host=192.168.204.104 dbname=repmgr user=repmgr'
|
||||||
#
|
#
|
||||||
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
||||||
@@ -63,8 +66,8 @@
|
|||||||
# -------------------------------
|
# -------------------------------
|
||||||
|
|
||||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||||
# (default: NOTICE)
|
# (default: INFO)
|
||||||
#loglevel=NOTICE
|
#loglevel=INFO
|
||||||
|
|
||||||
# Note that logging facility settings will only apply to `repmgrd` by default;
|
# Note that logging facility settings will only apply to `repmgrd` by default;
|
||||||
# `repmgr` will always write to STDERR unless the switch `--log-to-file` is
|
# `repmgr` will always write to STDERR unless the switch `--log-to-file` is
|
||||||
|
|||||||
109
repmgrd.c
109
repmgrd.c
@@ -514,6 +514,33 @@ main(int argc, char **argv)
|
|||||||
else if (node_info.type == STANDBY)
|
else if (node_info.type == STANDBY)
|
||||||
{
|
{
|
||||||
log_info(_("starting continuous standby node monitoring\n"));
|
log_info(_("starting continuous standby node monitoring\n"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call update_shared_memory() so it's not stuck at 0/0; this
|
||||||
|
* will otherwise cause an infinite loop on other repmgrds if
|
||||||
|
* this repmgrd does not enter failover.
|
||||||
|
*
|
||||||
|
* NOTE: this is a temporary workaround for a structural
|
||||||
|
* issue resolved through architectural redesign in repmgr 4.
|
||||||
|
*/
|
||||||
|
if (local_options.failover == MANUAL_FAILOVER)
|
||||||
|
{
|
||||||
|
update_shared_memory(PASSIVE_NODE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PQExpBufferData current_lsn;
|
||||||
|
|
||||||
|
XLogRecPtr last_wal_receive_location = get_last_wal_receive_location(my_local_conn);
|
||||||
|
|
||||||
|
initPQExpBuffer(¤t_lsn);
|
||||||
|
appendPQExpBuffer(¤t_lsn, "%X/%X",
|
||||||
|
format_lsn(last_wal_receive_location));
|
||||||
|
|
||||||
|
update_shared_memory(current_lsn.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(¤t_lsn);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do
|
do
|
||||||
@@ -847,6 +874,8 @@ standby_monitor(void)
|
|||||||
: "upstream";
|
: "upstream";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the upstream node is still available
|
* Check that the upstream node is still available
|
||||||
* If not, initiate failover process
|
* If not, initiate failover process
|
||||||
@@ -855,9 +884,7 @@ standby_monitor(void)
|
|||||||
* local_options.reconnect_interval seconds
|
* local_options.reconnect_interval seconds
|
||||||
*/
|
*/
|
||||||
|
|
||||||
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
if (!check_connection(&upstream_conn, upstream_node_type, upstream_conninfo))
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
int previous_master_node_id = master_options.node;
|
int previous_master_node_id = master_options.node;
|
||||||
|
|
||||||
@@ -1372,7 +1399,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
total_active_nodes = PQntuples(res);
|
total_active_nodes = PQntuples(res);
|
||||||
log_debug(_("%d active nodes registered\n"), total_active_nodes);
|
log_info(_("%d active nodes registered\n"), total_active_nodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build an array with the nodes and indicate which ones are visible and
|
* Build an array with the nodes and indicate which ones are visible and
|
||||||
@@ -1421,7 +1448,7 @@ do_master_failover(void)
|
|||||||
*
|
*
|
||||||
* If the master did come back at this point, the voting algorithm should decide
|
* If the master did come back at this point, the voting algorithm should decide
|
||||||
* it's the "best candidate" anyway and no standby will promote itself or
|
* it's the "best candidate" anyway and no standby will promote itself or
|
||||||
* attempt to follow* another server.
|
* attempt to follow another server.
|
||||||
*
|
*
|
||||||
* If we don't try and connect to the master here (and the code generally
|
* If we don't try and connect to the master here (and the code generally
|
||||||
* assumes it's failed anyway) but it does come back any time from here
|
* assumes it's failed anyway) but it does come back any time from here
|
||||||
@@ -1455,8 +1482,8 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
|
log_info(_("total nodes counted: registered=%d, visible=%d\n"),
|
||||||
total_active_nodes, visible_nodes);
|
total_active_nodes, visible_nodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Am I on the group that should keep alive? If I see less than half of
|
* Am I on the group that should keep alive? If I see less than half of
|
||||||
@@ -1473,7 +1500,7 @@ do_master_failover(void)
|
|||||||
/* Query all available nodes to determine readiness and LSN */
|
/* Query all available nodes to determine readiness and LSN */
|
||||||
for (i = 0; i < total_active_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
log_debug("checking node %i...\n", nodes[i].node_id);
|
log_info("checking node %i...\n", nodes[i].node_id);
|
||||||
|
|
||||||
/* if the node is not visible, skip it */
|
/* if the node is not visible, skip it */
|
||||||
if (!nodes[i].is_visible)
|
if (!nodes[i].is_visible)
|
||||||
@@ -1497,31 +1524,25 @@ do_master_failover(void)
|
|||||||
if (PQstatus(node_conn) != CONNECTION_OK)
|
if (PQstatus(node_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
||||||
|
log_detail("%s\n", PQerrorMessage(node_conn));
|
||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (server_version_num >= 100000)
|
xlog_recptr = get_last_wal_receive_location(node_conn);
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
|
||||||
else
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
|
||||||
|
|
||||||
res = PQexec(node_conn, sqlquery);
|
if (xlog_recptr == InvalidXLogRecPtr)
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
||||||
{
|
{
|
||||||
log_info(_("unable to retrieve node's last standby location: %s\n"),
|
log_info(_("unable to retrieve last standby location for node %i: %s\n"),
|
||||||
|
nodes[i].node_id,
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
|
|
||||||
log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
|
log_detail(_("connection details: %s\n"), nodes[i].conninfo_str);
|
||||||
PQclear(res);
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
|
log_info(_("current LSN of node %i is: %X/%X\n"), nodes[i].node_id, format_lsn(xlog_recptr));
|
||||||
|
|
||||||
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0));
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
|
|
||||||
/* If position is 0/0, error */
|
/* If position is 0/0, error */
|
||||||
@@ -1536,7 +1557,6 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* last we get info about this node, and update shared memory */
|
/* last we get info about this node, and update shared memory */
|
||||||
|
|
||||||
if (server_version_num >= 100000)
|
if (server_version_num >= 100000)
|
||||||
sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||||
else
|
else
|
||||||
@@ -1555,6 +1575,9 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
update_shared_memory(PQgetvalue(res, 0, 0));
|
update_shared_memory(PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
|
log_info("local node's LSN is %s\n", PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Wait for each node to come up and report a valid LSN */
|
/* Wait for each node to come up and report a valid LSN */
|
||||||
@@ -1591,6 +1614,9 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
if (PQstatus(node_conn) != CONNECTION_OK)
|
if (PQstatus(node_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
log_err(_("connection to node %i has gone away:\n%s\n"),
|
||||||
|
nodes[i].node_id,
|
||||||
|
PQerrorMessage(node_conn));
|
||||||
log_info(_("At this point, it could be some race conditions "
|
log_info(_("At this point, it could be some race conditions "
|
||||||
"that are acceptable, assume the node is restarting "
|
"that are acceptable, assume the node is restarting "
|
||||||
"and starting failover procedure\n"));
|
"and starting failover procedure\n"));
|
||||||
@@ -1607,6 +1633,9 @@ do_master_failover(void)
|
|||||||
res = PQexec(node_conn, sqlquery);
|
res = PQexec(node_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Note: in repmgr4 we handle this kind of situation much more gracefully.
|
||||||
|
*/
|
||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not "
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not "
|
||||||
"be considered as new master and exit.\n"),
|
"be considered as new master and exit.\n"),
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
@@ -1639,8 +1668,8 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
if (strcmp(location_value, PASSIVE_NODE) == 0)
|
if (strcmp(location_value, PASSIVE_NODE) == 0)
|
||||||
{
|
{
|
||||||
log_debug("node %i is passive mode\n", nodes[i].node_id);
|
|
||||||
log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
|
log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
|
||||||
|
log_detail("node %i indicates it is a passive node\n", nodes[i].node_id);
|
||||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
continue_loop = false;
|
continue_loop = false;
|
||||||
}
|
}
|
||||||
@@ -1650,7 +1679,8 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
|
else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"), nodes[i].node_id);
|
log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"),
|
||||||
|
nodes[i].node_id);
|
||||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
continue_loop = false;
|
continue_loop = false;
|
||||||
}
|
}
|
||||||
@@ -1658,12 +1688,8 @@ do_master_failover(void)
|
|||||||
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
||||||
else if (*location_value == '\0')
|
else if (*location_value == '\0')
|
||||||
{
|
{
|
||||||
log_crit(
|
log_crit(_("unable to obtain LSN from node %i"), nodes[i].node_id);
|
||||||
_("unable to obtain LSN from node %i"), nodes[i].node_id
|
log_hint(_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n"));
|
||||||
);
|
|
||||||
log_hint(
|
|
||||||
_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
|
|
||||||
);
|
|
||||||
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
/* XXX shouldn't we just ignore this node? */
|
/* XXX shouldn't we just ignore this node? */
|
||||||
@@ -1675,14 +1701,14 @@ do_master_failover(void)
|
|||||||
* strategy keep checking
|
* strategy keep checking
|
||||||
*/
|
*/
|
||||||
else {
|
else {
|
||||||
log_warning(_("unable to parse LSN \"%s\"\n"),
|
log_warning(_("unable to parse shared memory LSN \"%s\"\n"),
|
||||||
location_value);
|
location_value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_debug(
|
log_debug(
|
||||||
_("invalid LSN returned from node %i: '%s'\n"),
|
_("invalid shared memory LSN returned from node %i: '%s'\n"),
|
||||||
nodes[i].node_id,
|
nodes[i].node_id,
|
||||||
location_value);
|
location_value);
|
||||||
}
|
}
|
||||||
@@ -1704,7 +1730,7 @@ do_master_failover(void)
|
|||||||
nodes[i].xlog_location = xlog_recptr;
|
nodes[i].xlog_location = xlog_recptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
|
log_info(_("shared memory LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
|
||||||
|
|
||||||
ready_nodes++;
|
ready_nodes++;
|
||||||
nodes[i].is_ready = true;
|
nodes[i].is_ready = true;
|
||||||
@@ -1760,7 +1786,7 @@ do_master_failover(void)
|
|||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("best candidate node id is %i\n", best_candidate.node_id);
|
log_info("best candidate node id is %i\n", best_candidate.node_id);
|
||||||
|
|
||||||
/* if local node is the best candidate, promote it */
|
/* if local node is the best candidate, promote it */
|
||||||
if (best_candidate.node_id == local_options.node)
|
if (best_candidate.node_id == local_options.node)
|
||||||
@@ -1776,9 +1802,9 @@ do_master_failover(void)
|
|||||||
sleep(5);
|
sleep(5);
|
||||||
|
|
||||||
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||||
|
log_detail(_("LSN is %X/%X\n"), format_lsn(best_candidate.xlog_location));
|
||||||
log_debug("promote command is: \"%s\"\n",
|
log_info("promote command is: \"%s\"\n",
|
||||||
local_options.promote_command);
|
local_options.promote_command);
|
||||||
|
|
||||||
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
{
|
{
|
||||||
@@ -1834,6 +1860,8 @@ do_master_failover(void)
|
|||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_master.node_id);
|
failed_master.node_id);
|
||||||
|
|
||||||
|
log_notice("%s\n", event_details.data);
|
||||||
|
|
||||||
/* my_local_conn is now the master */
|
/* my_local_conn is now the master */
|
||||||
create_event_record(my_local_conn,
|
create_event_record(my_local_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
@@ -1894,7 +1922,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
log_notice(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
||||||
|
|
||||||
r = system(local_options.follow_command);
|
r = system(local_options.follow_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
@@ -2112,8 +2140,11 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
{
|
{
|
||||||
int connection_retries;
|
int connection_retries;
|
||||||
|
|
||||||
|
if (conninfo != NULL && is_server_available(conninfo))
|
||||||
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the node is still available if after
|
* Check if the node is still available; if after
|
||||||
* local_options.reconnect_attempts * local_options.reconnect_interval
|
* local_options.reconnect_attempts * local_options.reconnect_interval
|
||||||
* seconds of retries we cannot reconnect return false
|
* seconds of retries we cannot reconnect return false
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user