mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
61 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16896510dc | ||
|
|
1c155a1088 | ||
|
|
31d57f4122 | ||
|
|
7b313b9d71 | ||
|
|
cf126642bd | ||
|
|
52281fcde8 | ||
|
|
de573edaaa | ||
|
|
4cb7f301ad | ||
|
|
87d8de4441 | ||
|
|
6db742f81e | ||
|
|
c79933685c | ||
|
|
04ba672b9f | ||
|
|
4f4111063a | ||
|
|
3a3a536e6d | ||
|
|
6f7206a5a1 | ||
|
|
f9fd1dd227 | ||
|
|
8140ba9c27 | ||
|
|
32dba444e1 | ||
|
|
8212ff8d8a | ||
|
|
1ccd0edad2 | ||
|
|
59b31dd1ca | ||
|
|
300b9f0cc2 | ||
|
|
0efee4cf65 | ||
|
|
0cb2584886 | ||
|
|
b88d27248c | ||
|
|
683c54325e | ||
|
|
70d398cd47 | ||
|
|
7b7d80e5f2 | ||
|
|
96b0e26084 | ||
|
|
91c498f6f1 | ||
|
|
d48093e732 | ||
|
|
3f0d1754a4 | ||
|
|
f27979bbe1 | ||
|
|
e9445a5d5e | ||
|
|
9a2717b5e3 | ||
|
|
dd6ea1cd77 | ||
|
|
de5908c122 | ||
|
|
4b5c84921c | ||
|
|
aaa8d70cef | ||
|
|
ca31b846e7 | ||
|
|
a27cecb559 | ||
|
|
cf0cdfa6a1 | ||
|
|
31489d92c0 | ||
|
|
b7fd13aed2 | ||
|
|
3c4bf27aa7 | ||
|
|
0ebd9c15d9 | ||
|
|
f9dba283d4 | ||
|
|
205f1cebbb | ||
|
|
4d97c1ebf7 | ||
|
|
12c395e91f | ||
|
|
bd1e4f71d6 | ||
|
|
cb49071ea4 | ||
|
|
5ad674edff | ||
|
|
ac09bad89c | ||
|
|
009d92fec8 | ||
|
|
b3d8a68a1d | ||
|
|
05b47cb2a8 | ||
|
|
dc542a1b7d | ||
|
|
6ce8058749 | ||
|
|
2edcac77f0 | ||
|
|
f740374392 |
24
HISTORY
24
HISTORY
@@ -1,4 +1,26 @@
|
||||
3.1.0 2016-01-
|
||||
3.1.3 2016-05-17
|
||||
repmgrd: enable monitoring when a standby is catching up by
|
||||
replaying archived WAL (Ian)
|
||||
repmgrd: when upstream_node_id is NULL, assume upstream node
|
||||
to be current master (Ian)
|
||||
repmgrd: check for reappearance of the master node if standby
|
||||
promotion fails (Ian)
|
||||
improve handling of rsync failure conditions (Martín)
|
||||
|
||||
3.1.2 2016-04-12
|
||||
Fix pg_ctl path generation in do_standby_switchover() (Ian)
|
||||
Regularly sync witness server repl_nodes table (Ian)
|
||||
Documentation improvements (Gianni, dhyannataraj)
|
||||
(Experimental) ensure repmgr handles failover slots when copying
|
||||
in rsync mode (Craig, Ian)
|
||||
rsync mode handling fixes (Martín)
|
||||
Enable repmgr to compile against 9.6devel (Ian)
|
||||
|
||||
3.1.1 2016-02-24
|
||||
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
|
||||
Prevent repmgr/repmgrd running as root (Ian)
|
||||
|
||||
3.1.0 2016-02-01
|
||||
Add "repmgr standby switchover" command (Ian)
|
||||
Revised README file (Ian)
|
||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||
|
||||
27
Makefile
27
Makefile
@@ -2,23 +2,32 @@
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
|
||||
DATA = repmgr.sql uninstall_repmgr.sql
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
all: repmgrd repmgr
|
||||
|
||||
all: repmgrd repmgr
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgrd: $(repmgrd_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
||||
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgr: $(repmgr_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
|
||||
# Make all objects depend on all include files. This is a bit of a
|
||||
# shotgun approach, but the codebase is small enough that a complete rebuild
|
||||
# is very fast anyway.
|
||||
$(repmgr_OBJS): $(HEADERS)
|
||||
$(repmgrd_OBJS): $(HEADERS)
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
@@ -31,8 +40,8 @@ include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
||||
# is overriding pgxs install.
|
||||
# XXX: This overrides the pgxs install target - we're building two binaries,
|
||||
# which is not supported by pgxs.mk's PROGRAM construct.
|
||||
install: install_prog install_ext
|
||||
|
||||
install_prog:
|
||||
@@ -43,6 +52,12 @@ install_prog:
|
||||
install_ext:
|
||||
$(MAKE) -C sql install
|
||||
|
||||
# Distribution-specific package building targets
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# XXX we recommend using the PGDG-supplied packages where possible;
|
||||
# see README.md for details.
|
||||
|
||||
install_rhel:
|
||||
mkdir -p '$(DESTDIR)/etc/init.d/'
|
||||
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
||||
|
||||
116
README.md
116
README.md
@@ -33,10 +33,14 @@ provides a single read/write master server and one or more read-only standbys
|
||||
containing near-real time copies of the master server's database.
|
||||
|
||||
For a multi-master replication solution, please see 2ndQuadrant's BDR
|
||||
(bi-directional replication) extension. For selective replication, e.g.
|
||||
of individual tables or databases from one server to another, please
|
||||
see 2ndQuadrant's pglogical extension.
|
||||
(bi-directional replication) extension.
|
||||
|
||||
http://2ndquadrant.com/en-us/resources/bdr/
|
||||
|
||||
For selective replication, e.g. of individual tables or databases from one server
|
||||
to another, please see 2ndQuadrant's pglogical extension.
|
||||
|
||||
http://2ndquadrant.com/en-us/resources/pglogical/
|
||||
|
||||
### Concepts
|
||||
|
||||
@@ -109,16 +113,16 @@ tables:
|
||||
- `repl_monitor`: historical standby monitoring information written by `repmgrd`
|
||||
|
||||
views:
|
||||
- `repl_show_nodes`: based on the `repl_nodes` showing name of the server's
|
||||
upstream node
|
||||
- `repl_show_nodes`: based on the table `repl_nodes`, additionally showing the
|
||||
name of the server's upstream node
|
||||
- `repl_status`: when `repmgrd`'s monitoring is enabled, shows current monitoring
|
||||
status for each node
|
||||
|
||||
The `repmgr` metadata schema can be stored in an existing database or in its own
|
||||
dedicated database.
|
||||
|
||||
A dedicated superuser is required to own the meta-database as well as carry out
|
||||
administrative actions.
|
||||
A dedicated database superuser is required to own the meta-database as well as carry
|
||||
out administrative actions.
|
||||
|
||||
Installation
|
||||
------------
|
||||
@@ -128,7 +132,9 @@ Installation
|
||||
`repmgr` is developed and tested on Linux and OS X, but should work on any
|
||||
UNIX-like system supported by PostgreSQL itself.
|
||||
|
||||
`repmgr` supports PostgreSQL from version 9.3.
|
||||
Current versions of `repmgr` support PostgreSQL from version 9.3. If you are
|
||||
interested in using `repmgr` on earlier versions of PostgreSQL you can download
|
||||
version 2.1 which supports PostgreSQL from version 9.1.
|
||||
|
||||
All servers in the replication cluster must be running the same major version of
|
||||
PostgreSQL, and we recommend that they also run the same minor version.
|
||||
@@ -137,7 +143,7 @@ The `repmgr` tools must be installed on each server in the replication cluster.
|
||||
|
||||
A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
|
||||
`repmgrd` actions require direct access to the PostgreSQL data directory,
|
||||
it should executed by the `postgres` user.
|
||||
it should be executed by the `postgres` user.
|
||||
|
||||
Additionally, we recommend installing `rsync` and enabling passwordless
|
||||
`ssh` connectivity between all servers in the replication cluster.
|
||||
@@ -186,7 +192,8 @@ PostgreSQL itself.
|
||||
`repmgr` and `repmgrd` use a common configuration file, by default called
|
||||
`repmgr.conf` (although any name can be used if explicitly specified).
|
||||
At the very least, `repmgr.conf` must contain the connection parameters
|
||||
for the local `repmgr` database.
|
||||
for the local `repmgr` database; see `repmgr configuration file` below
|
||||
for more details.
|
||||
|
||||
The configuration file will be searched for in the following locations:
|
||||
|
||||
@@ -277,11 +284,11 @@ similar to the following:
|
||||
|
||||
local replication repmgr trust
|
||||
host replication repmgr 127.0.0.1/32 trust
|
||||
host replication repmgr 192.168.1.0/32 trust
|
||||
host replication repmgr 192.168.1.0/24 trust
|
||||
|
||||
local repmgr repmgr trust
|
||||
host repmgr repmgr 127.0.0.1/32 trust
|
||||
host repmgr repmgr 192.168.1.0/32 trust
|
||||
host repmgr repmgr 192.168.1.0/24 trust
|
||||
|
||||
Adjust according to your network environment and authentication requirements.
|
||||
|
||||
@@ -364,11 +371,11 @@ Clone the standby with:
|
||||
[2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
|
||||
[2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
|
||||
|
||||
This will clone the PostgreSQL data directory files from the master using
|
||||
PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
||||
correct parameters to start streaming from the master server will be created
|
||||
This will clone the PostgreSQL data directory files from the master at repmgr_node1
|
||||
using PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the
|
||||
correct parameters to start streaming from this master server will be created
|
||||
automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf`
|
||||
files will be copied.
|
||||
files will be copied from the master.
|
||||
|
||||
Make any adjustments to the PostgreSQL configuration files now, then start the
|
||||
standby server.
|
||||
@@ -377,12 +384,12 @@ standby server.
|
||||
|
||||
> *NOTE*: `repmgr standby clone` does not require `repmgr.conf`, however we
|
||||
> recommend providing this as `repmgr` will set the `application_name` parameter
|
||||
> in `recovery.conf` as value provided in `node_name`, making it easier to identify
|
||||
> the node in `pg_stat_replication`. It's also possible to provide some advanced
|
||||
> options for controlling the standby cloning process; see next section for
|
||||
> details.
|
||||
> in `recovery.conf` as the value provided in `node_name`, making it easier to
|
||||
> identify the node in `pg_stat_replication`. It's also possible to provide some
|
||||
> advanced options for controlling the standby cloning process; see next section
|
||||
> for details.
|
||||
|
||||
***
|
||||
* * *
|
||||
|
||||
### Verify replication is functioning
|
||||
|
||||
@@ -425,20 +432,20 @@ table:
|
||||
2 | standby | 1 | test | node2 | host=repmgr_node2 dbname=repmgr user=repmgr | | 100 | t
|
||||
(2 rows)
|
||||
|
||||
The standby server now has a copy of records for all servers in the replication
|
||||
cluster. Note that the relationship between master and standby is explicitly
|
||||
defined via the `upstream_node_id` value, which shows here that the standby's
|
||||
upstream server is the replication cluster master. While of limited use
|
||||
in a simple master/standby replication cluster, this information is required
|
||||
The standby server now has a copy of the records for all servers in the
|
||||
replication cluster. Note that the relationship between master and standby is
|
||||
explicitly defined via the `upstream_node_id` value, which shows here that the
|
||||
standby's upstream server is the replication cluster master. While of limited
|
||||
use in a simple master/standby replication cluster, this information is required
|
||||
to effectively manage cascading replication (see below).
|
||||
|
||||
|
||||
Advanced options for cloning a standby
|
||||
--------------------------------------
|
||||
|
||||
The above section demonstrates the simplest possible way to clone
|
||||
a standby server. Depending on your situation, finer-grained control
|
||||
over the cloning process may be necessary.
|
||||
The above section demonstrates the simplest possible way to cloneb a standby
|
||||
server. Depending on your circumstances, finer-grained controlover the cloning
|
||||
process may be necessary.
|
||||
|
||||
### pg_basebackup options when cloning a standby
|
||||
|
||||
@@ -449,8 +456,8 @@ However this may impact performance of the server being cloned from
|
||||
so should be used with care.
|
||||
|
||||
Further options can be passed to the `pg_basebackup` utility via
|
||||
the `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||
documentation for more details:
|
||||
the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
|
||||
documentation for more details of available options:
|
||||
http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
||||
|
||||
### Using rsync to clone a standby
|
||||
@@ -474,7 +481,7 @@ fresh clone with `pg_basebackup`.
|
||||
|
||||
By default, `repmgr` will attempt to copy the standard configuration files
|
||||
(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
|
||||
outside of the data directory (though note currently they will be copied
|
||||
outside of the data directory (though currently they will be copied
|
||||
into the standby's data directory). To prevent this happening, when executing
|
||||
`repmgr standby clone` provide the `--ignore-external-config-files` option.
|
||||
|
||||
@@ -702,16 +709,16 @@ Performing a switchover with repmgr
|
||||
A typical use-case for replication is a combination of master and standby
|
||||
server, with the standby serving as a backup which can easily be activated
|
||||
in case of a problem with the master. Such an unplanned failover would
|
||||
normally be handled by promoting the standby, after which appropriate action
|
||||
taken to restore the old master.
|
||||
normally be handled by promoting the standby, after which an appropriate
|
||||
action must be taken to restore the old master.
|
||||
|
||||
In some cases however it's desirable to promote the standby in a planned
|
||||
way, e.g. so maintenance can be performed on the master; this kind of switchover
|
||||
is supported by the `repmgr standby switchover` command.
|
||||
|
||||
`repmgr standby switchover` differs from other `repmgr` actions in that it
|
||||
also performs actions on another server, for which reason both passwordless
|
||||
SSH access and the path of `repmgr.conf` on that server.
|
||||
also performs actions on another server, for which reason you must provide
|
||||
both passwordless SSH access and the path of `repmgr.conf` on that server.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -869,6 +876,10 @@ be set in `repmgr.conf`:
|
||||
|
||||
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
|
||||
|
||||
Additionally, `postgresql.conf` must contain the following line:
|
||||
|
||||
shared_preload_libraries = 'repmgr_funcs'
|
||||
|
||||
When `failover` is set to `automatic`, upon detecting failure of the current
|
||||
master, `repmgrd` will execute one of `promote_command` or `follow_command`,
|
||||
depending on whether the current server is becoming the new master or
|
||||
@@ -991,8 +1002,11 @@ Monitoring
|
||||
----------
|
||||
|
||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
||||
constantly write node status information to the `repl_monitor` table, which can
|
||||
be queried easily using the view `repl_status`:
|
||||
constantly write standby node status information to the `repl_monitor` table,
|
||||
providing a near-real time overview of replication status on all nodes
|
||||
in the cluster.
|
||||
|
||||
The view `repl_status` shows the most recent state for each node, e.g.:
|
||||
|
||||
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
||||
-[ RECORD 1 ]-------------+-----------------------------
|
||||
@@ -1017,6 +1031,10 @@ table , it's advisable to regularly purge historical data with
|
||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||
many day's worth of data should be retained.
|
||||
|
||||
Note that when a standby node is not streaming directly from its upstream
|
||||
node, i.e. recovering WAL from an archive, `apply_lag` will always
|
||||
appear as `0 bytes`.
|
||||
|
||||
|
||||
Using a witness server with repmgrd
|
||||
------------------------------------
|
||||
@@ -1042,7 +1060,6 @@ makes sense to create a witness server in conjunction with running
|
||||
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
||||
|
||||
|
||||
|
||||
repmgrd and cascading replication
|
||||
---------------------------------
|
||||
|
||||
@@ -1159,7 +1176,7 @@ configuration file is located if `-f/--config-file` is not supplied.
|
||||
### repmgr commands
|
||||
|
||||
The `repmgr` command line tool accepts commands for specific servers in the
|
||||
replication in the format "`server type` `action`", or for the entire
|
||||
replication in the format "`server_type` `action`", or for the entire
|
||||
replication cluster in the format "`cluster` `action`". Each command is
|
||||
described below.
|
||||
|
||||
@@ -1247,16 +1264,29 @@ which contains connection details for the local database.
|
||||
time a failover occurs.
|
||||
|
||||
Note that it only makes sense to create a witness server if `repmgrd`
|
||||
is in use; see section "witness server" above.
|
||||
is in use; see section "Using a witness server" above.
|
||||
|
||||
This command requires a `repmgr.conf` file containing a valid conninfo
|
||||
string for the server to be created, as well as the other minimum required
|
||||
parameters detailed in the section `repmgr configuration file` above.
|
||||
|
||||
By default the witness server will use port 5499 to facilitate easier setup
|
||||
on a server running an existing node.
|
||||
on a server running an existing node. To use a different port, supply
|
||||
this explicitly in the `repmgr.conf` conninfo string.
|
||||
|
||||
This command also requires the location of the witness server's data
|
||||
directory to be provided (`-D/--datadir`) as well as valid connection
|
||||
parameters for the master server.
|
||||
|
||||
By default this command will create a superuser and a repmgr user.
|
||||
The `repmgr` user name will be extracted from the `conninfo` string
|
||||
in `repmgr.conf`.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each active node in the replication cluster. This
|
||||
command polls each registered server and shows its role (master / standby /
|
||||
witness) or "FAILED" if the node doesn't respond. It polls each server
|
||||
witness) or `FAILED` if the node doesn't respond. It polls each server
|
||||
directly and can be run on any node in the cluster; this is also useful
|
||||
when analyzing connectivity from a particular node.
|
||||
|
||||
|
||||
5
config.c
5
config.c
@@ -235,6 +235,9 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
@@ -358,6 +361,8 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
/* XXX we should have a dedicated boolean argument format */
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||
|
||||
3
config.h
3
config.h
@@ -75,13 +75,14 @@ typedef struct
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int witness_repl_nodes_sync_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
|
||||
115
dbutils.c
115
dbutils.c
@@ -420,7 +420,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||
parameter, datatype, op, value, datatype);
|
||||
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -587,7 +587,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
upstream_conninfo = upstream_conninfo_out;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.name, un.id "
|
||||
" SELECT un.conninfo, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
"INNER JOIN %s.repl_nodes n "
|
||||
" ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
|
||||
@@ -604,7 +604,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("unable to get conninfo for upstream server\n%s\n"),
|
||||
log_err(_("error when attempting to find upstream server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
@@ -612,9 +612,36 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
log_notice(_("no record found for upstream server"));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
log_debug("no record found for upstream server\n");
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
" WHERE un.cluster = '%s' "
|
||||
" AND un.type='master' "
|
||||
" AND un.active IS TRUE",
|
||||
get_repmgr_schema_quoted(standby_conn),
|
||||
cluster);
|
||||
res = PQexec(standby_conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("error when attempting to find active master server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
PQclear(res);
|
||||
log_notice(_("no record found for active master server\n"));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
log_debug("record found for active master server\n");
|
||||
}
|
||||
|
||||
strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
|
||||
@@ -889,7 +916,7 @@ get_repmgr_schema_quoted(PGconn *conn)
|
||||
|
||||
|
||||
bool
|
||||
create_replication_slot(PGconn *conn, char *slot_name)
|
||||
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int query_res;
|
||||
@@ -926,9 +953,19 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
||||
return false;
|
||||
}
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
/* In 9.6 and later, reserve the LSN straight away */
|
||||
if (server_version_num >= 90600)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
|
||||
slot_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
}
|
||||
|
||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||
@@ -1111,7 +1148,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
|
||||
|
||||
/*
|
||||
* copy_configuration()
|
||||
* witness_copy_node_records()
|
||||
*
|
||||
* Copy records in master's `repl_nodes` table to witness database
|
||||
*
|
||||
@@ -1119,29 +1156,49 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
* `repmgrd` after a failover event occurs
|
||||
*/
|
||||
bool
|
||||
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
{
|
||||
char sqlquery[MAXLEN];
|
||||
PGresult *res;
|
||||
int i;
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
/* Defer constraints */
|
||||
sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to defer constraints:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Truncate existing records */
|
||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get current records from primary */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||
get_repmgr_schema_quoted(masterconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(masterconn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -1149,20 +1206,23 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
log_err("Unable to retrieve node records from master:\n%s\n",
|
||||
PQerrorMessage(masterconn));
|
||||
PQclear(res);
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Insert primary records into witness table */
|
||||
for (i = 0; i < PQntuples(res); i++)
|
||||
{
|
||||
bool node_record_created;
|
||||
|
||||
log_verbose(LOG_DEBUG,
|
||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 4),
|
||||
"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 3),
|
||||
PQgetvalue(res, i, 0));
|
||||
|
||||
node_record_created = create_node_record(witnessconn,
|
||||
"copy_configuration",
|
||||
"witness_copy_node_records",
|
||||
atoi(PQgetvalue(res, i, 0)),
|
||||
PQgetvalue(res, i, 1),
|
||||
strlen(PQgetvalue(res, i, 2))
|
||||
@@ -1174,7 +1234,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
atoi(PQgetvalue(res, i, 5)),
|
||||
strlen(PQgetvalue(res, i, 6))
|
||||
? PQgetvalue(res, i, 6)
|
||||
: NULL
|
||||
: NULL,
|
||||
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||
? true
|
||||
: false
|
||||
);
|
||||
|
||||
if (node_record_created == false)
|
||||
@@ -1183,11 +1246,16 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
|
||||
log_err("Unable to copy node record to witness database\n%s\n",
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
/* And finished */
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1200,7 +1268,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
* XXX we should pass the record parameters as a struct.
|
||||
*/
|
||||
bool
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char upstream_node_id[MAXLEN];
|
||||
@@ -1241,8 +1309,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes "
|
||||
" (id, type, upstream_node_id, cluster, "
|
||||
" name, conninfo, slot_name, priority) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
||||
" name, conninfo, slot_name, priority, active) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
node,
|
||||
type,
|
||||
@@ -1251,7 +1319,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
node_name,
|
||||
conninfo,
|
||||
slot_name_buf,
|
||||
priority);
|
||||
priority,
|
||||
active == true ? "TRUE" : "FALSE");
|
||||
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||
|
||||
@@ -1291,7 +1360,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
||||
|
||||
if (action != NULL)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||
}
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
@@ -115,14 +115,14 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
@@ -133,3 +133,4 @@ int get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
||||
t_server_type parse_node_type(const char *type);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -37,5 +37,6 @@
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
#define ERR_BAD_BACKUP_LABEL 17
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
5
log.c
5
log.c
@@ -40,7 +40,8 @@
|
||||
/* #define REPMGR_DEBUG */
|
||||
|
||||
static int detect_log_facility(const char *facility);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_NOTICE;
|
||||
@@ -48,7 +49,7 @@ int last_log_level = LOG_NOTICE;
|
||||
int verbose_logging = false;
|
||||
int terse_logging = false;
|
||||
|
||||
void
|
||||
extern void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
|
||||
8
log.h
8
log.h
@@ -25,7 +25,7 @@
|
||||
#define REPMGR_SYSLOG 1
|
||||
#define REPMGR_STDERR 2
|
||||
|
||||
void
|
||||
extern void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
|
||||
@@ -123,8 +123,10 @@ bool logger_shutdown(void);
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
|
||||
void log_hint(const char *fmt, ...);
|
||||
void log_verbose(int level, const char *fmt, ...);
|
||||
void log_hint(const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
|
||||
void log_verbose(int level, const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
|
||||
extern int log_type;
|
||||
extern int log_level;
|
||||
|
||||
554
repmgr.c
554
repmgr.c
@@ -43,7 +43,6 @@
|
||||
#include "repmgr.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
@@ -122,6 +121,8 @@ static bool remote_command(const char *host, const char *user, const char *comma
|
||||
static void format_db_cli_params(const char *conninfo, char *output);
|
||||
static bool copy_file(const char *old_filename, const char *new_filename);
|
||||
|
||||
static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
|
||||
|
||||
/* Global variables */
|
||||
static const char *keywords[6];
|
||||
static const char *values[6];
|
||||
@@ -147,6 +148,7 @@ static char path_buf[MAXLEN] = "";
|
||||
ErrorList cli_errors = { NULL, NULL };
|
||||
ErrorList cli_warnings = { NULL, NULL };
|
||||
|
||||
static struct BackupLabel backup_label;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
@@ -159,6 +161,8 @@ main(int argc, char **argv)
|
||||
{"username", required_argument, NULL, 'U'},
|
||||
{"superuser", required_argument, NULL, 'S'},
|
||||
{"data-dir", required_argument, NULL, 'D'},
|
||||
/* alias for -D/--data-dir, following pg_ctl usage */
|
||||
{"pgdata", required_argument, NULL, 'D'},
|
||||
/* -l/--local-port is deprecated */
|
||||
{"local-port", required_argument, NULL, 'l'},
|
||||
{"config-file", required_argument, NULL, 'f'},
|
||||
@@ -175,12 +179,14 @@ main(int argc, char **argv)
|
||||
{"terse", required_argument, NULL, 't'},
|
||||
{"mode", required_argument, NULL, 'm'},
|
||||
{"remote-config-file", required_argument, NULL, 'C'},
|
||||
/* deprecated from 3.2; replaced with -P/--pwprompt */
|
||||
{"initdb-no-pwprompt", no_argument, NULL, 1},
|
||||
{"check-upstream-config", no_argument, NULL, 2},
|
||||
{"recovery-min-apply-delay", required_argument, NULL, 3},
|
||||
{"ignore-external-config-files", no_argument, NULL, 4},
|
||||
{"config-archive-dir", required_argument, NULL, 5},
|
||||
{"pg_rewind", optional_argument, NULL, 6},
|
||||
{"pwprompt", optional_argument, NULL, 7},
|
||||
{"help", no_argument, NULL, '?'},
|
||||
{"version", no_argument, NULL, 'V'},
|
||||
{NULL, 0, NULL, 0}
|
||||
@@ -196,6 +202,19 @@ main(int argc, char **argv)
|
||||
|
||||
set_progname(argv[0]);
|
||||
|
||||
/* Disallow running as root to prevent directory ownership problems */
|
||||
if (geteuid() == 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
_("%s: cannot be run as root\n"
|
||||
"Please log in (using, e.g., \"su\") as the "
|
||||
"(unprivileged) user that owns\n"
|
||||
"the data directory.\n"
|
||||
),
|
||||
progname());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Initialise some defaults */
|
||||
|
||||
/* set default user */
|
||||
@@ -210,7 +229,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "could not get current user name: %s\n", strerror(errno));
|
||||
fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
@@ -277,7 +296,7 @@ main(int argc, char **argv)
|
||||
strncpy(runtime_options.superuser, optarg, MAXLEN);
|
||||
break;
|
||||
case 'D':
|
||||
strncpy(runtime_options.dest_dir, optarg, MAXFILENAME);
|
||||
strncpy(runtime_options.dest_dir, optarg, MAXPGPATH);
|
||||
break;
|
||||
case 'l':
|
||||
/* -l/--local-port is deprecated */
|
||||
@@ -401,10 +420,13 @@ main(int argc, char **argv)
|
||||
case 6:
|
||||
if (optarg != NULL)
|
||||
{
|
||||
strncpy(runtime_options.pg_rewind, optarg, MAXFILENAME);
|
||||
strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH);
|
||||
}
|
||||
pg_rewind_supplied = true;
|
||||
break;
|
||||
case 7:
|
||||
runtime_options.witness_pwprompt = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
@@ -746,6 +768,8 @@ do_cluster_show(void)
|
||||
" FROM %s.repl_show_nodes",
|
||||
get_repmgr_schema_quoted(conn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery );
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -1051,7 +1075,8 @@ do_master_register(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
repmgr_slot_name_ptr);
|
||||
repmgr_slot_name_ptr,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
@@ -1152,9 +1177,8 @@ do_standby_register(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
repmgr_slot_name_ptr);
|
||||
|
||||
|
||||
repmgr_slot_name_ptr,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
@@ -1287,29 +1311,30 @@ do_standby_clone(void)
|
||||
bool target_directory_provided = false;
|
||||
bool external_config_file_copy_required = false;
|
||||
|
||||
char master_data_directory[MAXFILENAME];
|
||||
char local_data_directory[MAXFILENAME];
|
||||
char master_data_directory[MAXPGPATH];
|
||||
char local_data_directory[MAXPGPATH];
|
||||
|
||||
char master_config_file[MAXFILENAME] = "";
|
||||
char local_config_file[MAXFILENAME] = "";
|
||||
char master_config_file[MAXPGPATH] = "";
|
||||
char local_config_file[MAXPGPATH] = "";
|
||||
bool config_file_outside_pgdata = false;
|
||||
|
||||
char master_hba_file[MAXFILENAME] = "";
|
||||
char local_hba_file[MAXFILENAME] = "";
|
||||
char master_hba_file[MAXPGPATH] = "";
|
||||
char local_hba_file[MAXPGPATH] = "";
|
||||
bool hba_file_outside_pgdata = false;
|
||||
|
||||
char master_ident_file[MAXFILENAME] = "";
|
||||
char local_ident_file[MAXFILENAME] = "";
|
||||
char master_ident_file[MAXPGPATH] = "";
|
||||
char local_ident_file[MAXPGPATH] = "";
|
||||
bool ident_file_outside_pgdata = false;
|
||||
|
||||
char master_control_file[MAXFILENAME] = "";
|
||||
char local_control_file[MAXFILENAME] = "";
|
||||
char master_control_file[MAXPGPATH] = "";
|
||||
char local_control_file[MAXPGPATH] = "";
|
||||
|
||||
char *first_wal_segment = NULL;
|
||||
char *last_wal_segment = NULL;
|
||||
|
||||
PQExpBufferData event_details;
|
||||
|
||||
|
||||
/*
|
||||
* If dest_dir (-D/--pgdata) was provided, this will become the new data
|
||||
* directory (otherwise repmgr will default to the same directory as on the
|
||||
@@ -1471,7 +1496,7 @@ do_standby_clone(void)
|
||||
{
|
||||
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
|
||||
{
|
||||
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||
}
|
||||
else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0)
|
||||
{
|
||||
@@ -1479,7 +1504,7 @@ do_standby_clone(void)
|
||||
{
|
||||
config_file_outside_pgdata = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||
}
|
||||
}
|
||||
else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
|
||||
@@ -1488,7 +1513,7 @@ do_standby_clone(void)
|
||||
{
|
||||
hba_file_outside_pgdata = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||
}
|
||||
}
|
||||
else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0)
|
||||
@@ -1497,7 +1522,7 @@ do_standby_clone(void)
|
||||
{
|
||||
ident_file_outside_pgdata = true;
|
||||
external_config_file_copy_required = true;
|
||||
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
|
||||
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXPGPATH);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1513,20 +1538,20 @@ do_standby_clone(void)
|
||||
*/
|
||||
if (target_directory_provided)
|
||||
{
|
||||
strncpy(local_data_directory, runtime_options.dest_dir, MAXFILENAME);
|
||||
strncpy(local_config_file, runtime_options.dest_dir, MAXFILENAME);
|
||||
strncpy(local_hba_file, runtime_options.dest_dir, MAXFILENAME);
|
||||
strncpy(local_ident_file, runtime_options.dest_dir, MAXFILENAME);
|
||||
strncpy(local_data_directory, runtime_options.dest_dir, MAXPGPATH);
|
||||
strncpy(local_config_file, runtime_options.dest_dir, MAXPGPATH);
|
||||
strncpy(local_hba_file, runtime_options.dest_dir, MAXPGPATH);
|
||||
strncpy(local_ident_file, runtime_options.dest_dir, MAXPGPATH);
|
||||
}
|
||||
/*
|
||||
* Otherwise use the same data directory as on the remote host
|
||||
*/
|
||||
else
|
||||
{
|
||||
strncpy(local_data_directory, master_data_directory, MAXFILENAME);
|
||||
strncpy(local_config_file, master_config_file, MAXFILENAME);
|
||||
strncpy(local_hba_file, master_hba_file, MAXFILENAME);
|
||||
strncpy(local_ident_file, master_ident_file, MAXFILENAME);
|
||||
strncpy(local_data_directory, master_data_directory, MAXPGPATH);
|
||||
strncpy(local_config_file, master_config_file, MAXPGPATH);
|
||||
strncpy(local_hba_file, master_hba_file, MAXPGPATH);
|
||||
strncpy(local_ident_file, master_ident_file, MAXPGPATH);
|
||||
|
||||
log_notice(_("setting data directory to: %s\n"), local_data_directory);
|
||||
log_hint(_("use -D/--data-dir to explicitly specify a data directory\n"));
|
||||
@@ -1566,7 +1591,7 @@ do_standby_clone(void)
|
||||
*/
|
||||
if (options.use_replication_slots)
|
||||
{
|
||||
if (create_replication_slot(upstream_conn, repmgr_slot_name) == false)
|
||||
if (create_replication_slot(upstream_conn, repmgr_slot_name, server_version_num) == false)
|
||||
{
|
||||
PQfinish(upstream_conn);
|
||||
exit(ERR_DB_QUERY);
|
||||
@@ -1634,13 +1659,25 @@ do_standby_clone(void)
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
master_data_directory, local_data_directory,
|
||||
true, server_version_num);
|
||||
if (r != 0)
|
||||
/*
|
||||
Exit code 0 means no error, but we want to ignore exit code 24 as well
|
||||
as rsync returns that code on "Partial transfer due to vanished source files".
|
||||
It's quite common for this to happen on the data directory, particularly
|
||||
with long running rsync on a busy server.
|
||||
*/
|
||||
if (!WIFEXITED(r) && WEXITSTATUS(r) != 24)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying master data directory '%s'\n"),
|
||||
master_data_directory);
|
||||
goto stop_backup;
|
||||
}
|
||||
|
||||
/* Read backup label copied from primary */
|
||||
/* XXX ensure this function does not exit on error as we'd need to stop the backup */
|
||||
read_backup_label(local_data_directory, &backup_label);
|
||||
|
||||
printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file);
|
||||
|
||||
/* Handle tablespaces */
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
@@ -1708,6 +1745,18 @@ do_standby_clone(void)
|
||||
tblspc_dir_src.data, tblspc_dir_dst.data,
|
||||
true, server_version_num);
|
||||
|
||||
/*
|
||||
Exit code 0 means no error, but we want to ignore exit code 24 as well
|
||||
as rsync returns that code on "Partial transfer due to vanished source files".
|
||||
It's quite common for this to happen on the data directory, particularly
|
||||
with long running rsync on a busy server.
|
||||
*/
|
||||
if (!WIFEXITED(r) && WEXITSTATUS(r) != 24)
|
||||
{
|
||||
log_warning(_("standby clone: failed copying tablespace directory '%s'\n"),
|
||||
tblspc_dir_src.data);
|
||||
goto stop_backup;
|
||||
}
|
||||
|
||||
/* Update symlinks in pg_tblspc */
|
||||
if (mapping_found == true)
|
||||
@@ -1932,44 +1981,55 @@ stop_backup:
|
||||
exit(retval);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Clean up any $PGDATA subdirectories which may contain
|
||||
* files which won't be removed by rsync and which could
|
||||
* be stale or are otherwise not required
|
||||
*/
|
||||
if (runtime_options.rsync_only && runtime_options.force)
|
||||
if (runtime_options.rsync_only)
|
||||
{
|
||||
char script[MAXLEN];
|
||||
char label_path[MAXPGPATH];
|
||||
|
||||
/*
|
||||
* Remove any existing WAL from the target directory, since
|
||||
* rsync's --exclude option doesn't do it.
|
||||
*/
|
||||
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
|
||||
local_data_directory);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
if (runtime_options.force)
|
||||
{
|
||||
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
|
||||
local_data_directory);
|
||||
exit(ERR_BAD_RSYNC);
|
||||
/*
|
||||
* Remove any existing WAL from the target directory, since
|
||||
* rsync's --exclude option doesn't do it.
|
||||
*/
|
||||
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
|
||||
local_data_directory);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
|
||||
local_data_directory);
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any replication slot directories; this matches the
|
||||
* behaviour a base backup, which would result in an empty
|
||||
* pg_replslot directory.
|
||||
* Remove any existing replication slot directories from previous use
|
||||
* of this data directory; this matches the behaviour of a fresh
|
||||
* pg_basebackup, which would usually result in an empty pg_replslot
|
||||
* directory.
|
||||
*
|
||||
* If the backup label contains a nonzero
|
||||
* 'MIN FAILOVER SLOT LSN' entry we retain the slots and let
|
||||
* the server clean them up instead, matching pg_basebackup's
|
||||
* behaviour when failover slots are enabled.
|
||||
*
|
||||
* NOTE: watch out for any changes in the replication
|
||||
* slot directory name (as of 9.4: "pg_replslot") and
|
||||
* functionality of replication slots
|
||||
*/
|
||||
|
||||
if (server_version_num >= 90400)
|
||||
if (server_version_num >= 90400 &&
|
||||
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
|
||||
{
|
||||
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*",
|
||||
local_data_directory);
|
||||
|
||||
log_debug("deleting pg_replslot directory contents\n");
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
@@ -1978,6 +2038,13 @@ stop_backup:
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
}
|
||||
|
||||
/* delete the backup label file copied from the primary */
|
||||
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
|
||||
if (0 && unlink(label_path) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_warning(_("unable to delete backup label file %s\n"), label_path);
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
@@ -1993,9 +2060,9 @@ stop_backup:
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX It might be nice to provide the following options:
|
||||
* - have repmgr start the daemon automatically
|
||||
* - provide a custom pg_ctl command
|
||||
* XXX It might be nice to provide an options to have repmgr start
|
||||
* the PostgreSQL server automatically (e.g. with a custom pg_ctl
|
||||
* command)
|
||||
*/
|
||||
|
||||
log_notice(_("you can now start your PostgreSQL server\n"));
|
||||
@@ -2009,7 +2076,28 @@ stop_backup:
|
||||
log_hint(_("for example : /etc/init.d/postgresql start\n"));
|
||||
}
|
||||
|
||||
/* Log the event - if we could connect to the primary */
|
||||
|
||||
/*
|
||||
* XXX forgetting to (re) register the standby is a frequent cause
|
||||
* of error; we should consider having repmgr automatically
|
||||
* register the standby, either by default with an option
|
||||
* "--no-register", or an option "--register".
|
||||
*
|
||||
* Note that "repmgr standby register" requires the standby to
|
||||
* be running - if not, and we just update the node record,
|
||||
* we'd have an incorrect representation of the replication cluster.
|
||||
* Best combined with an automatic start of the server (see note
|
||||
* above)
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX detect whether a record exists for this node already, and
|
||||
* add a hint about using the -F/--force.
|
||||
*/
|
||||
|
||||
log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"\n"));
|
||||
|
||||
/* Log the event - if we can connect to the primary */
|
||||
|
||||
if (primary_conn != NULL)
|
||||
{
|
||||
@@ -2041,6 +2129,159 @@ stop_backup:
|
||||
exit(retval);
|
||||
}
|
||||
|
||||
static bool
|
||||
parse_lsn(XLogRecPtr *ptr, const char *str)
|
||||
{
|
||||
uint32 high, low;
|
||||
|
||||
if (sscanf(str, "%x/%x", &high, &low) != 2)
|
||||
return false;
|
||||
|
||||
*ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static XLogRecPtr
|
||||
parse_label_lsn(const char *label_key, const char *label_value)
|
||||
{
|
||||
XLogRecPtr ptr;
|
||||
|
||||
if (!parse_lsn(&ptr, label_value))
|
||||
{
|
||||
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
||||
label_key, label_value);
|
||||
|
||||
exit(ERR_BAD_BACKUP_LABEL);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*======================================
|
||||
* Read entries of interest from the backup label.
|
||||
*
|
||||
* Sample backup label (with failover slots):
|
||||
*
|
||||
* START WAL LOCATION: 0/6000028 (file 000000010000000000000006)
|
||||
* CHECKPOINT LOCATION: 0/6000060
|
||||
* BACKUP METHOD: streamed
|
||||
* BACKUP FROM: master
|
||||
* START TIME: 2016-03-30 12:18:12 AWST
|
||||
* LABEL: pg_basebackup base backup
|
||||
* MIN FAILOVER SLOT LSN: 0/5000000
|
||||
*
|
||||
*======================================
|
||||
*/
|
||||
static void
|
||||
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
|
||||
{
|
||||
char label_path[MAXPGPATH];
|
||||
FILE *label_file;
|
||||
int nmatches = 0;
|
||||
|
||||
char line[MAXLEN];
|
||||
|
||||
out_backup_label->start_wal_location = InvalidXLogRecPtr;
|
||||
out_backup_label->start_wal_file[0] = '\0';
|
||||
out_backup_label->checkpoint_location = InvalidXLogRecPtr;
|
||||
out_backup_label->backup_from[0] = '\0';
|
||||
out_backup_label->backup_method[0] = '\0';
|
||||
out_backup_label->start_time[0] = '\0';
|
||||
out_backup_label->label[0] = '\0';
|
||||
out_backup_label->min_failover_slot_lsn = InvalidXLogRecPtr;
|
||||
|
||||
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
|
||||
|
||||
label_file = fopen(label_path, "r");
|
||||
if (label_file == NULL)
|
||||
{
|
||||
log_err(_("read_backup_label: could not open backup label file %s: %s"),
|
||||
label_path, strerror(errno));
|
||||
exit(ERR_BAD_BACKUP_LABEL);
|
||||
}
|
||||
|
||||
log_info(_("read_backup_label: parsing backup label file '%s'\n"),
|
||||
label_path);
|
||||
|
||||
while(fgets(line, sizeof line, label_file) != NULL)
|
||||
{
|
||||
char label_key[MAXLEN];
|
||||
char label_value[MAXLEN];
|
||||
char newline;
|
||||
|
||||
nmatches = sscanf(line, "%" MAXLEN_STR "[^:]: %" MAXLEN_STR "[^\n]%c",
|
||||
label_key, label_value, &newline);
|
||||
|
||||
if (nmatches != 3)
|
||||
break;
|
||||
|
||||
if (newline != '\n')
|
||||
{
|
||||
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
|
||||
label_key, label_value);
|
||||
exit(ERR_BAD_BACKUP_LABEL);
|
||||
}
|
||||
|
||||
log_debug("standby clone: got backup label entry \"%s: %s\"\n",
|
||||
label_key, label_value);
|
||||
|
||||
if (strcmp(label_key, "START WAL LOCATION") == 0)
|
||||
{
|
||||
char start_wal_location[MAXLEN];
|
||||
char wal_filename[MAXLEN];
|
||||
|
||||
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
|
||||
if (nmatches != 2)
|
||||
{
|
||||
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
|
||||
exit(ERR_BAD_BACKUP_LABEL);
|
||||
}
|
||||
out_backup_label->start_wal_location =
|
||||
parse_label_lsn(&label_key[0], start_wal_location);
|
||||
|
||||
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
|
||||
out_backup_label->start_wal_file[MAXLEN-1] = '\0';
|
||||
}
|
||||
else if (strcmp(label_key, "CHECKPOINT LOCATION") == 0)
|
||||
{
|
||||
out_backup_label->checkpoint_location =
|
||||
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||
}
|
||||
else if (strcmp(label_key, "BACKUP METHOD") == 0)
|
||||
{
|
||||
(void) strncpy(out_backup_label->backup_method, label_value, MAXLEN);
|
||||
out_backup_label->backup_method[MAXLEN-1] = '\0';
|
||||
}
|
||||
else if (strcmp(label_key, "BACKUP FROM") == 0)
|
||||
{
|
||||
(void) strncpy(out_backup_label->backup_from, label_value, MAXLEN);
|
||||
out_backup_label->backup_from[MAXLEN-1] = '\0';
|
||||
}
|
||||
else if (strcmp(label_key, "START TIME") == 0)
|
||||
{
|
||||
(void) strncpy(out_backup_label->start_time, label_value, MAXLEN);
|
||||
out_backup_label->start_time[MAXLEN-1] = '\0';
|
||||
}
|
||||
else if (strcmp(label_key, "LABEL") == 0)
|
||||
{
|
||||
(void) strncpy(out_backup_label->label, label_value, MAXLEN);
|
||||
out_backup_label->label[MAXLEN-1] = '\0';
|
||||
}
|
||||
else if (strcmp(label_key, "MIN FAILOVER SLOT LSN") == 0)
|
||||
{
|
||||
out_backup_label->min_failover_slot_lsn =
|
||||
parse_label_lsn(&label_key[0], &label_value[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("read_backup_label: ignored unrecognised backup label entry \"%s: %s\"",
|
||||
label_key, label_value);
|
||||
}
|
||||
}
|
||||
|
||||
(void) fclose(label_file);
|
||||
}
|
||||
|
||||
static void
|
||||
do_standby_promote(void)
|
||||
@@ -2214,7 +2455,7 @@ do_standby_follow(void)
|
||||
|
||||
int r,
|
||||
retval;
|
||||
char data_dir[MAXFILENAME];
|
||||
char data_dir[MAXPGPATH];
|
||||
|
||||
bool success;
|
||||
|
||||
@@ -2297,7 +2538,7 @@ do_standby_follow(void)
|
||||
|
||||
master_id = get_master_node_id(master_conn, options.cluster_name);
|
||||
|
||||
strncpy(data_dir, runtime_options.dest_dir, MAXFILENAME);
|
||||
strncpy(data_dir, runtime_options.dest_dir, MAXPGPATH);
|
||||
}
|
||||
|
||||
|
||||
@@ -2329,7 +2570,9 @@ do_standby_follow(void)
|
||||
|
||||
if (options.use_replication_slots)
|
||||
{
|
||||
if (create_replication_slot(master_conn, repmgr_slot_name) == false)
|
||||
int server_version_num = get_server_version(master_conn, NULL);
|
||||
|
||||
if (create_replication_slot(master_conn, repmgr_slot_name, server_version_num) == false)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
@@ -2834,8 +3077,8 @@ do_standby_switchover(void)
|
||||
*/
|
||||
|
||||
maxlen_snprintf(command,
|
||||
"%s/pg_ctl -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
||||
pg_bindir,
|
||||
"%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
|
||||
make_pg_path("pg_ctl"),
|
||||
remote_data_directory,
|
||||
runtime_options.pg_ctl_mode);
|
||||
|
||||
@@ -3277,10 +3520,17 @@ do_standby_restore_config(void)
|
||||
}
|
||||
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL) {
|
||||
struct stat statbuf;
|
||||
char arcdir_ent_path[MAXPGPATH];
|
||||
PQExpBufferData src_file;
|
||||
PQExpBufferData dst_file;
|
||||
|
||||
if (arcdir_ent->d_type != DT_REG)
|
||||
snprintf(arcdir_ent_path, MAXPGPATH,
|
||||
"%s/%s",
|
||||
runtime_options.config_archive_dir,
|
||||
arcdir_ent->d_name);
|
||||
|
||||
if (stat(arcdir_ent_path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -3358,6 +3608,8 @@ do_witness_create(void)
|
||||
char master_hba_file[MAXLEN];
|
||||
bool success;
|
||||
bool record_created;
|
||||
char repmgr_user[MAXLEN];
|
||||
char repmgr_db[MAXLEN];
|
||||
|
||||
/* Connection parameters for master only */
|
||||
keywords[0] = "host";
|
||||
@@ -3365,6 +3617,13 @@ do_witness_create(void)
|
||||
keywords[1] = "port";
|
||||
values[1] = runtime_options.masterport;
|
||||
|
||||
/*
|
||||
* Extract the repmgr user and database names from the conninfo string
|
||||
* provided in repmgr.conf
|
||||
*/
|
||||
get_conninfo_value(options.conninfo, "user", repmgr_user);
|
||||
get_conninfo_value(options.conninfo, "dbname", repmgr_db);
|
||||
|
||||
/* We need to connect to check configuration and copy it */
|
||||
masterconn = establish_db_connection_by_params(keywords, values, true);
|
||||
if (!masterconn)
|
||||
@@ -3454,7 +3713,7 @@ do_witness_create(void)
|
||||
maxlen_snprintf(script, "%s %s -D %s init -o \"%s-U %s\"",
|
||||
make_pg_path("pg_ctl"),
|
||||
options.pg_ctl_options, runtime_options.dest_dir,
|
||||
runtime_options.initdb_no_pwprompt ? "" : "-W ",
|
||||
runtime_options.witness_pwprompt ? "-W " : "",
|
||||
runtime_options.superuser);
|
||||
log_info(_("initializing cluster for witness: %s.\n"), script);
|
||||
|
||||
@@ -3500,8 +3759,8 @@ do_witness_create(void)
|
||||
xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname());
|
||||
fputs(buf, pg_conf);
|
||||
|
||||
|
||||
/* Attempt to extract a port number from the provided conninfo string
|
||||
/*
|
||||
* Attempt to extract a port number from the provided conninfo string.
|
||||
* This will override any value provided with '-l/--local-port', as it's
|
||||
* what we'll later try and connect to anyway. '-l/--local-port' should
|
||||
* be deprecated.
|
||||
@@ -3552,13 +3811,18 @@ do_witness_create(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
/* check if we need to create a user */
|
||||
if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0)
|
||||
if (strcmp(repmgr_user, "postgres") != 0)
|
||||
{
|
||||
/* create required user; needs to be superuser to create untrusted language function in c */
|
||||
maxlen_snprintf(script, "%s -p %s --superuser --login -U %s %s",
|
||||
/* create required user; needs to be superuser to create untrusted
|
||||
* language function in C */
|
||||
maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s",
|
||||
make_pg_path("createuser"),
|
||||
runtime_options.localport, runtime_options.superuser, runtime_options.username);
|
||||
runtime_options.localport,
|
||||
runtime_options.witness_pwprompt ? "-P " : "",
|
||||
runtime_options.superuser,
|
||||
repmgr_user);
|
||||
log_info(_("creating user for witness db: %s.\n"), script);
|
||||
|
||||
r = system(script);
|
||||
@@ -3584,7 +3848,10 @@ do_witness_create(void)
|
||||
/* create required db */
|
||||
maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s",
|
||||
make_pg_path("createdb"),
|
||||
runtime_options.localport, runtime_options.superuser, runtime_options.username, runtime_options.dbname);
|
||||
runtime_options.localport,
|
||||
runtime_options.superuser,
|
||||
repmgr_user,
|
||||
repmgr_db);
|
||||
log_info("creating database for witness db: %s.\n", script);
|
||||
|
||||
r = system(script);
|
||||
@@ -3610,7 +3877,7 @@ do_witness_create(void)
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
char *errmsg = _("unable to retrieve location of pg_hba.conf");
|
||||
char *errmsg = _("Unable to retrieve location of pg_hba.conf");
|
||||
log_err("%s\n", errmsg);
|
||||
|
||||
create_event_record(masterconn,
|
||||
@@ -3627,7 +3894,7 @@ do_witness_create(void)
|
||||
master_hba_file, runtime_options.dest_dir, false, -1);
|
||||
if (r != 0)
|
||||
{
|
||||
char *errmsg = _("unable to copy pg_hba.conf from master");
|
||||
char *errmsg = _("Unable to copy pg_hba.conf from master");
|
||||
log_err("%s\n", errmsg);
|
||||
|
||||
create_event_record(masterconn,
|
||||
@@ -3641,7 +3908,7 @@ do_witness_create(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* reload to adapt for changed pg_hba.conf */
|
||||
/* reload witness server to activate the copied pg_hba.conf */
|
||||
maxlen_snprintf(script, "%s %s -w -D %s reload",
|
||||
make_pg_path("pg_ctl"),
|
||||
options.pg_ctl_options, runtime_options.dest_dir);
|
||||
@@ -3663,7 +3930,47 @@ do_witness_create(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* register ourselves in the master */
|
||||
/* establish a connection to the witness, and create the schema */
|
||||
witnessconn = establish_db_connection(options.conninfo, false);
|
||||
|
||||
if (PQstatus(witnessconn) != CONNECTION_OK)
|
||||
{
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
options.node,
|
||||
"witness_create",
|
||||
false,
|
||||
_("Unable to connect to witness servetr"));
|
||||
PQfinish(masterconn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
log_info(_("starting copy of configuration from master...\n"));
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
if (!create_schema(witnessconn))
|
||||
{
|
||||
rollback_transaction(witnessconn);
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
options.node,
|
||||
"witness_create",
|
||||
false,
|
||||
_("Unable to create schema on witness"));
|
||||
PQfinish(masterconn);
|
||||
PQfinish(witnessconn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
/*
|
||||
* Register new witness server on the primary
|
||||
* Do this as late as possible to avoid having to delete
|
||||
* the record if the server creation fails
|
||||
*/
|
||||
|
||||
if (runtime_options.force)
|
||||
{
|
||||
@@ -3687,7 +3994,8 @@ do_witness_create(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
NULL);
|
||||
NULL,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
@@ -3702,32 +4010,9 @@ do_witness_create(void)
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
/* establish a connection to the witness, and create the schema */
|
||||
witnessconn = establish_db_connection(options.conninfo, true);
|
||||
|
||||
log_info(_("starting copy of configuration from master...\n"));
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
|
||||
if (!create_schema(witnessconn))
|
||||
{
|
||||
rollback_transaction(witnessconn);
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
options.node,
|
||||
"witness_create",
|
||||
false,
|
||||
_("unable to create schema on witness"));
|
||||
PQfinish(masterconn);
|
||||
PQfinish(witnessconn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
/* copy configuration from master, only repl_nodes is needed */
|
||||
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
|
||||
if (!witness_copy_node_records(masterconn, witnessconn, options.cluster_name))
|
||||
{
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
@@ -3735,24 +4020,33 @@ do_witness_create(void)
|
||||
"witness_create",
|
||||
false,
|
||||
_("Unable to copy configuration from master"));
|
||||
|
||||
/*
|
||||
* delete previously created witness node record
|
||||
* XXX maybe set inactive?
|
||||
*/
|
||||
delete_node_record(masterconn,
|
||||
options.node,
|
||||
"witness create");
|
||||
|
||||
PQfinish(masterconn);
|
||||
PQfinish(witnessconn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* drop superuser powers if needed */
|
||||
if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0)
|
||||
if (strcmp(repmgr_user, "postgres") != 0)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", runtime_options.username);
|
||||
sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user);
|
||||
log_info(_("revoking superuser status on user %s: %s.\n"),
|
||||
runtime_options.username, sqlquery);
|
||||
repmgr_user, sqlquery);
|
||||
|
||||
log_debug(_("witness create: %s\n"), sqlquery);
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("unable to alter user privileges for user %s: %s\n"),
|
||||
runtime_options.username,
|
||||
log_err(_("Unable to alter user privileges for user %s: %s\n"),
|
||||
repmgr_user,
|
||||
PQerrorMessage(witnessconn));
|
||||
PQfinish(masterconn);
|
||||
PQfinish(witnessconn);
|
||||
@@ -3760,6 +4054,10 @@ do_witness_create(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Finished with the witness server */
|
||||
|
||||
PQfinish(witnessconn);
|
||||
|
||||
/* Log the event */
|
||||
create_event_record(masterconn,
|
||||
&options,
|
||||
@@ -3769,7 +4067,6 @@ do_witness_create(void)
|
||||
NULL);
|
||||
|
||||
PQfinish(masterconn);
|
||||
PQfinish(witnessconn);
|
||||
|
||||
log_notice(_("configuration has been successfully copied to the witness\n"));
|
||||
}
|
||||
@@ -3829,7 +4126,8 @@ do_help(void)
|
||||
printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \
|
||||
" optionally providing a path to the binary\n"));
|
||||
printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n"));
|
||||
printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));
|
||||
/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/
|
||||
printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n"));
|
||||
printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \
|
||||
" (default: postgres)\n"));
|
||||
printf(_("\n"));
|
||||
@@ -3975,6 +4273,7 @@ test_ssh_connection(char *host, char *remote_user)
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
char *local_path, bool is_directory, int server_version_num)
|
||||
@@ -4019,6 +4318,9 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
* See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
|
||||
* we're basically simulating what pg_basebackup does, but with rsync rather
|
||||
* than the BASEBACKUP replication protocol command.
|
||||
*
|
||||
* *However* currently we'll always copy the contents of the 'pg_replslot'
|
||||
* directory and delete later if appropriate.
|
||||
*/
|
||||
if (is_directory)
|
||||
{
|
||||
@@ -4047,12 +4349,6 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
appendPQExpBuffer(&rsync_flags, "%s",
|
||||
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
|
||||
|
||||
if (server_version_num >= 90400)
|
||||
{
|
||||
appendPQExpBuffer(&rsync_flags, "%s",
|
||||
" --exclude=pg_replslot/*");
|
||||
}
|
||||
|
||||
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
|
||||
rsync_flags.data, host_string, remote_path, local_path);
|
||||
}
|
||||
@@ -4275,6 +4571,11 @@ check_parameters_for_action(const int action)
|
||||
config_file_required = false;
|
||||
break;
|
||||
case WITNESS_CREATE:
|
||||
/* Require data directory */
|
||||
if (strcmp(runtime_options.dest_dir, "") == 0)
|
||||
{
|
||||
error_list_append(&cli_errors, _("-D/--data-dir required when executing WITNESS CREATE"));
|
||||
}
|
||||
/* allow all parameters to be supplied */
|
||||
break;
|
||||
case CLUSTER_SHOW:
|
||||
@@ -4406,7 +4707,7 @@ create_schema(PGconn *conn)
|
||||
"CREATE TABLE %s.repl_nodes ( "
|
||||
" id INTEGER PRIMARY KEY, "
|
||||
" type TEXT NOT NULL CHECK (type IN('master','standby','witness')), "
|
||||
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id), "
|
||||
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id) DEFERRABLE, "
|
||||
" cluster TEXT NOT NULL, "
|
||||
" name TEXT NOT NULL, "
|
||||
" conninfo TEXT NOT NULL, "
|
||||
@@ -4766,29 +5067,48 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
|
||||
char *wal_error_message = NULL;
|
||||
|
||||
/* Check that WAL level is set correctly */
|
||||
if (server_version_num < 90300)
|
||||
if (server_version_num < 90400)
|
||||
{
|
||||
i = guc_set(conn, "wal_level", "=", "hot_standby");
|
||||
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby'");
|
||||
}
|
||||
else
|
||||
{
|
||||
char *levels[] = {
|
||||
char *levels_pre96[] = {
|
||||
"hot_standby",
|
||||
"logical",
|
||||
NULL,
|
||||
};
|
||||
|
||||
int j = 0;
|
||||
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
|
||||
char *levels_96plus[] = {
|
||||
"replica",
|
||||
"logical",
|
||||
NULL,
|
||||
};
|
||||
|
||||
for(; j < 2; j++)
|
||||
char **levels;
|
||||
int j = 0;
|
||||
|
||||
if (server_version_num < 90600)
|
||||
{
|
||||
levels = (char **)levels_pre96;
|
||||
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
|
||||
}
|
||||
else
|
||||
{
|
||||
levels = (char **)levels_96plus;
|
||||
wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'");
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
i = guc_set(conn, "wal_level", "=", levels[j]);
|
||||
if (i)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
j++;
|
||||
} while (levels[j] != NULL);
|
||||
}
|
||||
|
||||
if (i == 0 || i == -1)
|
||||
|
||||
@@ -15,21 +15,21 @@
|
||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||
# to facilitate easier querying of the repmgr views and tables.
|
||||
cluster=example_cluster
|
||||
#cluster=example_cluster
|
||||
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
# replication funcion, as this will cause confusion when e.g.
|
||||
# "standby2" is promoted to primary)
|
||||
node=2 # a unique integer
|
||||
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
#node=2 # a unique integer
|
||||
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
# the server's hostname or another identifier unambiguously
|
||||
# associated with the server to avoid confusion
|
||||
|
||||
# Database connection information as a conninfo string
|
||||
# This must be accessible to all servers in the cluster; for details see:
|
||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
|
||||
# Optional configuration items
|
||||
# ============================
|
||||
@@ -37,15 +37,16 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
# Replication settings
|
||||
# ---------------------
|
||||
|
||||
# when using cascading replication and a standby is to be connected to an
|
||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||
# must exist before the new standby can be registered. If a standby is
|
||||
# to connect directly to a primary node, this parameter is not required.
|
||||
upstream_node=1
|
||||
# When using cascading replication, a standby can connect to another
|
||||
# upstream standby node which is specified by setting 'upstream_node'.
|
||||
# In that case, the upstream node must exist before the new standby
|
||||
# can be registered. If 'upstream_node' is not set, then the standby
|
||||
# will connect directly to the primary node.
|
||||
#upstream_node=1
|
||||
|
||||
# use physical replication slots - PostgreSQL 9.4 and later only
|
||||
# (default: 0)
|
||||
use_replication_slots=0
|
||||
#use_replication_slots=0
|
||||
|
||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||
# number of standbys which will connect to the primary.
|
||||
@@ -55,15 +56,15 @@ use_replication_slots=0
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# (default: NOTICE)
|
||||
loglevel=NOTICE
|
||||
#loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# (default: STDERR)
|
||||
logfacility=STDERR
|
||||
#logfacility=STDERR
|
||||
|
||||
# stderr can be redirected to an arbitrary file:
|
||||
#
|
||||
logfile='/var/log/repmgr/repmgr.log'
|
||||
#logfile='/var/log/repmgr/repmgr.log'
|
||||
|
||||
# event notifications can be passed to an arbitrary external program
|
||||
# together with the following parameters:
|
||||
@@ -77,12 +78,12 @@ logfile='/var/log/repmgr/repmgr.log'
|
||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||
# so should be quoted in the provided command configuration, e.g.:
|
||||
#
|
||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones:
|
||||
#
|
||||
event_notifications=master_register,standby_register,witness_create
|
||||
#event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
|
||||
# Environment/command settings
|
||||
@@ -90,17 +91,17 @@ event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
pg_bindir=/usr/bin/
|
||||
#pg_bindir=/usr/bin/
|
||||
|
||||
# external command options
|
||||
|
||||
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
ssh_options=-o "StrictHostKeyChecking no"
|
||||
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
#ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
# external command arguments. Values shown are examples.
|
||||
|
||||
pg_ctl_options='-s'
|
||||
pg_basebackup_options='--xlog-method=s'
|
||||
#pg_ctl_options='-s'
|
||||
#pg_basebackup_options='--xlog-method=s'
|
||||
|
||||
|
||||
# Standby clone settings
|
||||
@@ -122,27 +123,30 @@ pg_basebackup_options='--xlog-method=s'
|
||||
# Number of seconds to wait for a response from the primary server before
|
||||
# deciding it has failed.
|
||||
|
||||
master_response_timeout=60
|
||||
#master_response_timeout=60
|
||||
|
||||
# Number of attempts at what interval (in seconds) to try and
|
||||
# connect to a server to establish its status (e.g. master
|
||||
# during failover)
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
#reconnect_attempts=6
|
||||
#reconnect_interval=10
|
||||
|
||||
# Autofailover options
|
||||
failover=manual # one of 'automatic', 'manual'
|
||||
#failover=manual # one of 'automatic', 'manual'
|
||||
# (default: manual)
|
||||
priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||
#priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# monitoring interval in seconds; default is 2
|
||||
monitor_interval_secs=2
|
||||
#monitor_interval_secs=2
|
||||
|
||||
# change wait time for primary; before we bail out and exit when the primary
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
retry_promote_interval_secs=300
|
||||
#retry_promote_interval_secs=300
|
||||
|
||||
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||
#witness_repl_nodes_sync_interval_secs=15
|
||||
|
||||
27
repmgr.h
27
repmgr.h
@@ -32,8 +32,6 @@
|
||||
#define MIN_SUPPORTED_VERSION "9.3"
|
||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||
|
||||
#include "config.h"
|
||||
#define MAXFILENAME 1024
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
@@ -57,8 +55,8 @@ typedef struct
|
||||
char dbname[MAXLEN];
|
||||
char host[MAXLEN];
|
||||
char username[MAXLEN];
|
||||
char dest_dir[MAXFILENAME];
|
||||
char config_file[MAXFILENAME];
|
||||
char dest_dir[MAXPGPATH];
|
||||
char config_file[MAXPGPATH];
|
||||
char remote_user[MAXLEN];
|
||||
char superuser[MAXLEN];
|
||||
char wal_keep_segments[MAXLEN];
|
||||
@@ -67,7 +65,7 @@ typedef struct
|
||||
bool force;
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool initdb_no_pwprompt;
|
||||
bool witness_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
bool ignore_external_config_files;
|
||||
@@ -81,7 +79,7 @@ typedef struct
|
||||
|
||||
/* parameter used by STANDBY SWITCHOVER */
|
||||
char remote_config_file[MAXLEN];
|
||||
char pg_rewind[MAXFILENAME];
|
||||
char pg_rewind[MAXPGPATH];
|
||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||
char config_archive_dir[MAXLEN];
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
@@ -91,11 +89,24 @@ typedef struct
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
|
||||
/* deprecated command line option */
|
||||
/* deprecated command line options */
|
||||
char localport[MAXLEN];
|
||||
bool initdb_no_pwprompt;
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "" }
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
|
||||
|
||||
struct BackupLabel
|
||||
{
|
||||
XLogRecPtr start_wal_location;
|
||||
char start_wal_file[MAXLEN];
|
||||
XLogRecPtr checkpoint_location;
|
||||
char backup_from[MAXLEN];
|
||||
char backup_method[MAXLEN];
|
||||
char start_time[MAXLEN];
|
||||
char label[MAXLEN];
|
||||
XLogRecPtr min_failover_slot_lsn;
|
||||
};
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
extern bool config_file_found;
|
||||
|
||||
249
repmgrd.c
249
repmgrd.c
@@ -142,6 +142,20 @@ main(int argc, char **argv)
|
||||
|
||||
set_progname(argv[0]);
|
||||
|
||||
/* Disallow running as root to prevent directory ownership problems */
|
||||
if (geteuid() == 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
_("%s: cannot be run as root\n"
|
||||
"Please log in (using, e.g., \"su\") as the "
|
||||
"(unprivileged) user that owns "
|
||||
"the data directory.\n"
|
||||
),
|
||||
progname());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
@@ -260,7 +274,14 @@ main(int argc, char **argv)
|
||||
/* Retrieve record for this node from the local database */
|
||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||
|
||||
/* No node record found - exit gracefully */
|
||||
/*
|
||||
* No node record found - exit gracefully
|
||||
*
|
||||
* Note: it's highly unlikely this situation will occur when starting
|
||||
* repmgrd on a witness, unless someone goes to the trouble of
|
||||
* deleting the node record from the previously copied table.
|
||||
*/
|
||||
|
||||
if (node_info.node_id == NODE_NOT_FOUND)
|
||||
{
|
||||
log_err(_("No metadata record found for this node - terminating\n"));
|
||||
@@ -277,9 +298,12 @@ main(int argc, char **argv)
|
||||
*/
|
||||
do
|
||||
{
|
||||
/* Timer for repl_nodes synchronisation interval */
|
||||
int sync_repl_nodes_elapsed = 0;
|
||||
|
||||
/*
|
||||
* Set my server mode, establish a connection to master and start
|
||||
* monitor
|
||||
* monitoring
|
||||
*/
|
||||
|
||||
switch (node_info.type)
|
||||
@@ -379,8 +403,8 @@ main(int argc, char **argv)
|
||||
local_options.cluster_name);
|
||||
|
||||
master_conn = get_master_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
|
||||
if (master_conn == NULL)
|
||||
{
|
||||
@@ -388,8 +412,7 @@ main(int argc, char **argv)
|
||||
initPQExpBuffer(&errmsg);
|
||||
|
||||
appendPQExpBuffer(&errmsg,
|
||||
_("unable to connect to master node '%s'"),
|
||||
master_options.node_name);
|
||||
_("unable to connect to master node"));
|
||||
|
||||
log_err("%s\n", errmsg.data);
|
||||
|
||||
@@ -452,6 +475,24 @@ main(int argc, char **argv)
|
||||
|
||||
sleep(local_options.monitor_interval_secs);
|
||||
|
||||
/*
|
||||
* On a witness node, regularly resync the repl_nodes table
|
||||
* to keep up with any changes on the primary
|
||||
*
|
||||
* TODO: only resync the table if changes actually detected
|
||||
*/
|
||||
if (node_info.type == WITNESS)
|
||||
{
|
||||
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
|
||||
log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
|
||||
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
|
||||
{
|
||||
log_debug(_("Resyncing repl_nodes table\n"));
|
||||
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||
sync_repl_nodes_elapsed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (got_SIGHUP)
|
||||
{
|
||||
/*
|
||||
@@ -466,6 +507,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
got_SIGHUP = false;
|
||||
}
|
||||
|
||||
if (failover_done)
|
||||
{
|
||||
log_debug(_("standby check loop will terminate\n"));
|
||||
@@ -558,7 +600,7 @@ witness_monitor(void)
|
||||
* XXX it would be neat to be able to handle this with e.g. table-based
|
||||
* logical replication
|
||||
*/
|
||||
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
||||
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -654,15 +696,15 @@ standby_monitor(void)
|
||||
PGresult *res;
|
||||
char monitor_standby_timestamp[MAXLEN];
|
||||
char last_wal_master_location[MAXLEN];
|
||||
char last_wal_standby_received[MAXLEN];
|
||||
char last_wal_standby_applied[MAXLEN];
|
||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||
bool last_wal_standby_received_gte_replayed;
|
||||
char last_xlog_receive_location[MAXLEN];
|
||||
char last_xlog_replay_location[MAXLEN];
|
||||
char last_xact_replay_timestamp[MAXLEN];
|
||||
bool last_xlog_receive_location_gte_replayed;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
XLogRecPtr lsn_master;
|
||||
XLogRecPtr lsn_standby_received;
|
||||
XLogRecPtr lsn_standby_applied;
|
||||
XLogRecPtr lsn_master_current_xlog_location;
|
||||
XLogRecPtr lsn_last_xlog_receive_location;
|
||||
XLogRecPtr lsn_last_xlog_replay_location;
|
||||
|
||||
int connection_retries,
|
||||
ret;
|
||||
@@ -674,8 +716,9 @@ standby_monitor(void)
|
||||
t_node_info upstream_node;
|
||||
|
||||
int active_master_id;
|
||||
const char *type = NULL;
|
||||
const char *upstream_node_type = NULL;
|
||||
|
||||
bool receiving_streamed_wal = true;
|
||||
/*
|
||||
* Verify that the local node is still available - if not there's
|
||||
* no point in doing much else anyway
|
||||
@@ -700,9 +743,10 @@ standby_monitor(void)
|
||||
upstream_conn = get_upstream_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
local_options.node,
|
||||
&upstream_node_id, upstream_conninfo);
|
||||
&upstream_node_id,
|
||||
upstream_conninfo);
|
||||
|
||||
type = upstream_node_id == master_options.node
|
||||
upstream_node_type = (upstream_node_id == master_options.node)
|
||||
? "master"
|
||||
: "upstream";
|
||||
|
||||
@@ -712,7 +756,7 @@ standby_monitor(void)
|
||||
* we cannot reconnect, try to get a new upstream node.
|
||||
*/
|
||||
|
||||
check_connection(&upstream_conn, type, upstream_conninfo);
|
||||
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
||||
/*
|
||||
* This takes up to local_options.reconnect_attempts *
|
||||
* local_options.reconnect_interval seconds
|
||||
@@ -725,7 +769,7 @@ standby_monitor(void)
|
||||
|
||||
if (local_options.failover == MANUAL_FAILOVER)
|
||||
{
|
||||
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), type);
|
||||
log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);
|
||||
|
||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||
{
|
||||
@@ -784,7 +828,7 @@ standby_monitor(void)
|
||||
* Failover handling is handled differently depending on whether
|
||||
* the failed node is the master or a cascading standby
|
||||
*/
|
||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||
|
||||
if (upstream_node.type == MASTER)
|
||||
{
|
||||
@@ -887,7 +931,7 @@ standby_monitor(void)
|
||||
* from the upstream node to write monitoring information
|
||||
*/
|
||||
|
||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
|
||||
|
||||
sprintf(sqlquery,
|
||||
"SELECT id "
|
||||
@@ -956,29 +1000,42 @@ standby_monitor(void)
|
||||
}
|
||||
|
||||
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
|
||||
last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
/*
|
||||
* If pg_last_xlog_receive_location is NULL, this means we're in archive
|
||||
* recovery and will need to calculate lag based on pg_last_xlog_replay_location
|
||||
*/
|
||||
|
||||
/*
|
||||
* Replayed WAL is greater than received streamed WAL
|
||||
*/
|
||||
if (PQgetisnull(res, 0, 1))
|
||||
{
|
||||
receiving_streamed_wal = false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* In the unusual event of a standby becoming disconnected from the primary,
|
||||
* while this repmgrd remains connected to the primary, subtracting
|
||||
* "lsn_standby_applied" from "lsn_standby_received" and coercing to
|
||||
* "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
|
||||
* (long long unsigned int) will result in a meaningless, very large
|
||||
* value which will overflow a BIGINT column and spew error messages into the
|
||||
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
||||
* to insert a monitoring record.
|
||||
*/
|
||||
if (last_wal_standby_received_gte_replayed == false)
|
||||
if (receiving_streamed_wal == true && last_xlog_receive_location_gte_replayed == false)
|
||||
{
|
||||
log_verbose(LOG_WARNING,
|
||||
"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
|
||||
return;
|
||||
"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
|
||||
}
|
||||
|
||||
/* Get master xlog info */
|
||||
@@ -996,29 +1053,49 @@ standby_monitor(void)
|
||||
PQclear(res);
|
||||
|
||||
/* Calculate the lag */
|
||||
lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
|
||||
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
|
||||
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||
|
||||
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||
|
||||
if (last_xlog_receive_location_gte_replayed == false)
|
||||
{
|
||||
lsn_last_xlog_receive_location = lsn_last_xlog_replay_location;
|
||||
}
|
||||
else
|
||||
{
|
||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Build the SQL to execute on master
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_monitor "
|
||||
" (primary_node, standby_node, "
|
||||
" last_monitor_time, last_apply_time, "
|
||||
" last_wal_primary_location, last_wal_standby_location, "
|
||||
" replication_lag, apply_lag ) "
|
||||
" VALUES(%d, %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s', '%s', "
|
||||
" %llu, %llu) ",
|
||||
" (primary_node, "
|
||||
" standby_node, "
|
||||
" last_monitor_time, "
|
||||
" last_apply_time, "
|
||||
" last_wal_primary_location, "
|
||||
" last_wal_standby_location, "
|
||||
" replication_lag, "
|
||||
" apply_lag ) "
|
||||
" VALUES(%d, "
|
||||
" %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s', "
|
||||
" '%s', "
|
||||
" %llu, "
|
||||
" %llu) ",
|
||||
get_repmgr_schema_quoted(master_conn),
|
||||
master_options.node, local_options.node,
|
||||
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
|
||||
last_wal_master_location, last_wal_standby_received,
|
||||
(long long unsigned int)(lsn_master - lsn_standby_received),
|
||||
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
|
||||
master_options.node,
|
||||
local_options.node,
|
||||
monitor_standby_timestamp,
|
||||
last_xact_replay_timestamp,
|
||||
last_wal_master_location,
|
||||
last_xlog_receive_location,
|
||||
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location),
|
||||
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location));
|
||||
|
||||
/*
|
||||
* Execute the query asynchronously, but don't check for a result. We will
|
||||
@@ -1056,7 +1133,7 @@ do_master_failover(void)
|
||||
XLogRecPtr xlog_recptr;
|
||||
bool lsn_format_ok;
|
||||
|
||||
char last_wal_standby_applied[MAXLEN];
|
||||
char last_xlog_replay_location[MAXLEN];
|
||||
|
||||
PGconn *node_conn = NULL;
|
||||
|
||||
@@ -1239,8 +1316,8 @@ do_master_failover(void)
|
||||
" considered as new master and exit.\n"),
|
||||
PQerrorMessage(my_local_conn));
|
||||
PQclear(res);
|
||||
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
||||
update_shared_memory(last_wal_standby_applied);
|
||||
sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
|
||||
update_shared_memory(last_xlog_replay_location);
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
/* write last location in shared memory */
|
||||
@@ -1370,9 +1447,6 @@ do_master_failover(void)
|
||||
PQfinish(node_conn);
|
||||
}
|
||||
|
||||
/* Close the connection to this server */
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = NULL;
|
||||
|
||||
/*
|
||||
* determine which one is the best candidate to promote to master
|
||||
@@ -1420,18 +1494,24 @@ do_master_failover(void)
|
||||
terminate(ERR_FAILOVER_FAIL);
|
||||
}
|
||||
|
||||
log_debug("best candidate node id is %i\n", best_candidate.node_id);
|
||||
|
||||
/* if local node is the best candidate, promote it */
|
||||
if (best_candidate.node_id == local_options.node)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
|
||||
/* Close the connection to this server */
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = NULL;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
/* wait */
|
||||
sleep(5);
|
||||
|
||||
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||
|
||||
log_debug(_("promote command is: \"%s\"\n"),
|
||||
log_debug("promote command is: \"%s\"\n",
|
||||
local_options.promote_command);
|
||||
|
||||
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||
@@ -1442,6 +1522,33 @@ do_master_failover(void)
|
||||
r = system(local_options.promote_command);
|
||||
if (r != 0)
|
||||
{
|
||||
/*
|
||||
* Check whether the primary reappeared, which will have caused the
|
||||
* promote command to fail
|
||||
*/
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, false);
|
||||
|
||||
if (my_local_conn != NULL)
|
||||
{
|
||||
int master_node_id;
|
||||
|
||||
master_conn = get_master_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
&master_node_id, NULL);
|
||||
|
||||
if (master_conn != NULL && master_node_id == failed_master.node_id)
|
||||
{
|
||||
log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
|
||||
|
||||
PQfinish(master_conn);
|
||||
/* no failover occurred but we'll want to restart connections */
|
||||
failover_done = true;
|
||||
return;
|
||||
}
|
||||
|
||||
PQfinish(my_local_conn);
|
||||
}
|
||||
|
||||
log_err(_("promote command failed. You could check and try it manually.\n"));
|
||||
|
||||
terminate(ERR_DB_QUERY);
|
||||
@@ -1473,11 +1580,39 @@ do_master_failover(void)
|
||||
{
|
||||
PGconn *new_master_conn;
|
||||
PQExpBufferData event_details;
|
||||
int master_node_id;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
/* wait */
|
||||
sleep(10);
|
||||
|
||||
/*
|
||||
* Check whether the primary reappeared while we were waiting, so we
|
||||
* don't end up following the promotion candidate
|
||||
*/
|
||||
|
||||
master_conn = get_master_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
&master_node_id, NULL);
|
||||
|
||||
if (master_conn != NULL && master_node_id == failed_master.node_id)
|
||||
{
|
||||
log_notice(_("Original master reappeared - no action taken\n"));
|
||||
|
||||
PQfinish(master_conn);
|
||||
/* no failover occurred but we'll want to restart connections */
|
||||
failover_done = true;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* Close the connection to this server */
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = NULL;
|
||||
|
||||
/* XXX double-check the promotion candidate did become the new primary */
|
||||
|
||||
log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"),
|
||||
best_candidate.node_id);
|
||||
|
||||
@@ -1601,7 +1736,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
log_err(_("no node with id %i found"), upstream_node_id);
|
||||
log_err(_("no node with id %i found\n"), upstream_node_id);
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
@@ -1934,6 +2069,8 @@ check_node_configuration(void)
|
||||
/* Adding the node */
|
||||
log_info(_("adding node %d to cluster '%s'\n"),
|
||||
local_options.node, local_options.cluster_name);
|
||||
|
||||
/* XXX use create_node_record() */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes"
|
||||
" (id, cluster, name, conninfo, priority, witness) "
|
||||
@@ -2055,7 +2192,7 @@ terminate(int retval)
|
||||
|
||||
|
||||
static void
|
||||
update_shared_memory(char *last_wal_standby_applied)
|
||||
update_shared_memory(char *last_xlog_replay_location)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
@@ -2063,7 +2200,7 @@ update_shared_memory(char *last_wal_standby_applied)
|
||||
sprintf(sqlquery,
|
||||
"SELECT %s.repmgr_update_standby_location('%s')",
|
||||
get_repmgr_schema_quoted(my_local_conn),
|
||||
last_wal_standby_applied);
|
||||
last_xlog_replay_location);
|
||||
|
||||
/* If an error happens, just inform about that and continue */
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
@@ -2284,7 +2421,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
|
||||
if (res == 0)
|
||||
{
|
||||
log_warning(_("No record found record for node %i\n"), node_id);
|
||||
log_warning(_("No record found for node %i\n"), node_id);
|
||||
}
|
||||
|
||||
return node_info;
|
||||
|
||||
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
|
||||
* --------------------------------------------------
|
||||
*
|
||||
* This update is only required if repmgrd is being used in conjunction
|
||||
* with a witness server.
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statement listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE;
|
||||
COMMIT;
|
||||
@@ -83,7 +83,12 @@ _PG_init(void)
|
||||
* resources in repmgr_shmem_startup().
|
||||
*/
|
||||
RequestAddinShmemSpace(repmgr_memsize());
|
||||
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
RequestNamedLWLockTranche("repmgr", 1);
|
||||
#else
|
||||
RequestAddinLWLocks(1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Install hooks.
|
||||
@@ -128,7 +133,11 @@ repmgr_shmem_startup(void)
|
||||
if (!found)
|
||||
{
|
||||
/* First time through ... */
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
|
||||
#else
|
||||
shared_state->lock = LWLockAssign();
|
||||
#endif
|
||||
snprintf(shared_state->location,
|
||||
sizeof(shared_state->location), "%X/%X", 0, 0);
|
||||
}
|
||||
|
||||
@@ -24,12 +24,17 @@
|
||||
#include <stdlib.h>
|
||||
#include "errcode.h"
|
||||
|
||||
|
||||
#define QUERY_STR_LEN 8192
|
||||
#define MAXLEN 1024
|
||||
#define MAXLINELENGTH 4096
|
||||
#define MAXVERSIONSTR 16
|
||||
#define MAXCONNINFO 1024
|
||||
|
||||
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
||||
#define STR(x) CppAsString(x)
|
||||
|
||||
#define MAXLEN_STR STR(MAXLEN)
|
||||
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
|
||||
Reference in New Issue
Block a user