mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
113 Commits
v3.1.5
...
REL3_0_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7cc7a278c2 | ||
|
|
20d9f978ab | ||
|
|
70a2797b9a | ||
|
|
8f62b4c9e6 | ||
|
|
310f3f31f9 | ||
|
|
4f849de95e | ||
|
|
0de4260664 | ||
|
|
fc75084e42 | ||
|
|
cfbc9dd3c6 | ||
|
|
94579b5f2e | ||
|
|
e9a25c367a | ||
|
|
3088096318 | ||
|
|
3bbd32c73c | ||
|
|
ac17033d61 | ||
|
|
711ad0a76c | ||
|
|
ad988dccce | ||
|
|
53fe3c7e5a | ||
|
|
7a439c90d0 | ||
|
|
87e5257cb8 | ||
|
|
1f240ff9b3 | ||
|
|
9d6cff0d40 | ||
|
|
f86e251430 | ||
|
|
085b7cb8b4 | ||
|
|
5ccf89ad9b | ||
|
|
6ae5401df0 | ||
|
|
4bd8190d02 | ||
|
|
efdc2355a7 | ||
|
|
61b1f72a0e | ||
|
|
882bfd9d8e | ||
|
|
c93f717305 | ||
|
|
85be96a0be | ||
|
|
ce2d4fb86f | ||
|
|
40354e1d62 | ||
|
|
3e1655f241 | ||
|
|
8387e7f65e | ||
|
|
aa4dd155b2 | ||
|
|
a171a501ab | ||
|
|
f42f771ff4 | ||
|
|
88cfcf358e | ||
|
|
ce3594d52d | ||
|
|
f64c42a514 | ||
|
|
3072139d06 | ||
|
|
3b7185fd39 | ||
|
|
819f980e76 | ||
|
|
49316fb8fb | ||
|
|
fa4ff73b87 | ||
|
|
29842f0e0d | ||
|
|
25db1ba737 | ||
|
|
7b9f6f5352 | ||
|
|
53b8f99217 | ||
|
|
95cdaac91d | ||
|
|
e7dd0f690c | ||
|
|
e0c5bb8d31 | ||
|
|
df3e55fa35 | ||
|
|
0ee2a1e6ba | ||
|
|
df05214970 | ||
|
|
bd1314d232 | ||
|
|
745566605d | ||
|
|
807dcc1038 | ||
|
|
acc0ffa81f | ||
|
|
1725e90308 | ||
|
|
2a3fb89603 | ||
|
|
8f24167f68 | ||
|
|
6ce94778d7 | ||
|
|
3a3c6d5143 | ||
|
|
73661637e9 | ||
|
|
ae84041a4e | ||
|
|
ea01d1d30b | ||
|
|
53ed8e948c | ||
|
|
43626892d0 | ||
|
|
8870b7d7f1 | ||
|
|
72b1e57251 | ||
|
|
6054da2c25 | ||
|
|
049ea4e24f | ||
|
|
5f8185ef3a | ||
|
|
66a6c15773 | ||
|
|
919fc0fbef | ||
|
|
c7c117130b | ||
|
|
df6517f167 | ||
|
|
0bf3fb0605 | ||
|
|
c2172d79a5 | ||
|
|
709276a19c | ||
|
|
3f98e1b91b | ||
|
|
8af08ab3f4 | ||
|
|
ff038a5148 | ||
|
|
f56f70c2a6 | ||
|
|
d353fe2a9f | ||
|
|
a70a44605f | ||
|
|
d14dcb3d8b | ||
|
|
249ac7c72a | ||
|
|
9d850fc4bd | ||
|
|
42cb811a07 | ||
|
|
1e202540e3 | ||
|
|
52db03d320 | ||
|
|
60d720f0c7 | ||
|
|
34af7dec2a | ||
|
|
a59ea243c0 | ||
|
|
0c5025b3d6 | ||
|
|
42b79b9b54 | ||
|
|
2e47c6b40b | ||
|
|
6fbff4747f | ||
|
|
cc567d38c8 | ||
|
|
69c552b8e0 | ||
|
|
51967d2bd8 | ||
|
|
97be9c0cda | ||
|
|
00a28fbb1e | ||
|
|
d512bac31d | ||
|
|
fb6781775d | ||
|
|
04c751a912 | ||
|
|
2615cffecc | ||
|
|
1f838f99c2 | ||
|
|
d3f119005b | ||
|
|
db6d4d8820 |
239
FAILOVER.rst
239
FAILOVER.rst
@@ -1 +1,238 @@
|
||||
The contents of this file have been incorporated into the main README.md document.
|
||||
====================================================
|
||||
PostgreSQL Automatic Failover - User Documentation
|
||||
====================================================
|
||||
|
||||
Automatic Failover
|
||||
==================
|
||||
|
||||
repmgr allows for automatic failover when it detects the failure of the master node.
|
||||
Following is a quick setup for this.
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
For convenience, we define:
|
||||
|
||||
**node1**
|
||||
is the fully qualified domain name of the Master server, IP 192.168.1.10
|
||||
**node2**
|
||||
is the fully qualified domain name of the Standby server, IP 192.168.1.11
|
||||
**witness**
|
||||
is the fully qualified domain name of the server used as a witness, IP 192.168.1.12
|
||||
|
||||
**Note:** We don't recommend using names with the status of a server like «masterserver»,
|
||||
because it would be confusing once a failover takes place and the Master is
|
||||
now on the «standbyserver».
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
2 PostgreSQL servers are involved in the replication. Automatic failover needs
|
||||
a vote to decide what server it should promote, so an odd number is required.
|
||||
A witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
||||
cluster to communicate with other repmgrd daemons.
|
||||
|
||||
1. Install PostgreSQL in all the servers involved (including the witness server)
|
||||
|
||||
2. Install repmgr in all the servers involved (including the witness server)
|
||||
|
||||
3. Configure the Master PostreSQL
|
||||
|
||||
4. Clone the Master to the Standby using "repmgr standby clone" command
|
||||
|
||||
5. Configure repmgr in all the servers involved (including the witness server)
|
||||
|
||||
6. Register Master and Standby nodes
|
||||
|
||||
7. Initiate witness server
|
||||
|
||||
8. Start the repmgrd daemons in all nodes
|
||||
|
||||
**Note** A complete High-Availability design needs at least 3 servers to still have
|
||||
a backup node after a first failure.
|
||||
|
||||
Install PostgreSQL
|
||||
------------------
|
||||
|
||||
You can install PostgreSQL using any of the recommended methods. You should ensure
|
||||
it's 9.0 or later.
|
||||
|
||||
Install repmgr
|
||||
--------------
|
||||
|
||||
Install repmgr following the steps in the README file.
|
||||
|
||||
Configure PostreSQL
|
||||
-------------------
|
||||
|
||||
Log in to node1.
|
||||
|
||||
Edit the file postgresql.conf and modify the parameters::
|
||||
|
||||
listen_addresses='*'
|
||||
wal_level = 'hot_standby'
|
||||
archive_mode = on
|
||||
archive_command = 'cd .' # we can also use exit 0, anything that
|
||||
# just does nothing
|
||||
max_wal_senders = 10
|
||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||
hot_standby = on
|
||||
shared_preload_libraries = 'repmgr_funcs'
|
||||
|
||||
Edit the file pg_hba.conf and add lines for the replication::
|
||||
|
||||
host repmgr repmgr 127.0.0.1/32 trust
|
||||
host repmgr repmgr 192.168.1.10/30 trust
|
||||
host replication all 192.168.1.10/30 trust
|
||||
|
||||
**Note:** It is also possible to use a password authentication (md5), .pgpass file
|
||||
should be edited to allow connection between each node.
|
||||
|
||||
Create the user and database to manage replication::
|
||||
|
||||
su - postgres
|
||||
createuser -s repmgr
|
||||
createdb -O repmgr repmgr
|
||||
|
||||
Restart the PostgreSQL server::
|
||||
|
||||
pg_ctl -D $PGDATA restart
|
||||
|
||||
And check everything is fine in the server log.
|
||||
|
||||
Create the ssh-key for the postgres user and copy it to other servers::
|
||||
|
||||
su - postgres
|
||||
ssh-keygen # /!\ do not use a passphrase /!\
|
||||
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
||||
chmod 600 ~/.ssh/authorized_keys
|
||||
exit
|
||||
rsync -avz ~postgres/.ssh/authorized_keys node2:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/authorized_keys witness:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/id_rsa* node2:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/id_rsa* witness:~postgres/.ssh/
|
||||
|
||||
Clone Master
|
||||
------------
|
||||
|
||||
Log in to node2.
|
||||
|
||||
Clone node1 (the current Master)::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h node1 standby clone
|
||||
|
||||
Start the PostgreSQL server::
|
||||
|
||||
pg_ctl -D $PGDATA start
|
||||
|
||||
And check everything is fine in the server log.
|
||||
|
||||
Configure repmgr
|
||||
----------------
|
||||
|
||||
Log in to each server and configure repmgr by editing the file
|
||||
/etc/repmgr/repmgr.conf::
|
||||
|
||||
cluster=my_cluster
|
||||
node=1
|
||||
node_name=earth
|
||||
conninfo='host=192.168.1.10 dbname=repmgr user=repmgr'
|
||||
master_response_timeout=60
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
failover=automatic
|
||||
promote_command='promote_command.sh'
|
||||
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
|
||||
|
||||
**cluster**
|
||||
is the name of the current replication.
|
||||
**node**
|
||||
is the number of the current node (1, 2 or 3 in the current example).
|
||||
**node_name**
|
||||
is an identifier for every node.
|
||||
**conninfo**
|
||||
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration you need to add a 'port=5499' to the conninfo.
|
||||
**master_response_timeout**
|
||||
is the maximum amount of time we are going to wait before deciding the master has died and start the failover procedure.
|
||||
**reconnect_attempts**
|
||||
is the number of times we will try to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**reconnect_interval**
|
||||
is the amount of time between retries to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**failover**
|
||||
configure behavior: *manual* or *automatic*.
|
||||
**promote_command**
|
||||
the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
|
||||
**follow_command**
|
||||
the command executed to address the current standby to another Master. The command must return 0 on success.
|
||||
|
||||
Register Master and Standby
|
||||
---------------------------
|
||||
|
||||
Log in to node1.
|
||||
|
||||
Register the node as master::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||
|
||||
This will also create the repmgr schema and functions.
|
||||
|
||||
Log in to node2. Register it as a standby::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf standby register
|
||||
|
||||
Initialize witness server
|
||||
-------------------------
|
||||
|
||||
Log in to witness.
|
||||
|
||||
Initialize the witness server::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
|
||||
|
||||
The witness server needs the following information from the command
|
||||
line:
|
||||
|
||||
* Connection details for the current master, to copy the cluster
|
||||
configuration.
|
||||
* A location for initializing its own $PGDATA.
|
||||
|
||||
repmgr will also ask for the superuser password on the witness database so
|
||||
it can reconnect when needed (the command line option --initdb-no-pwprompt
|
||||
will set up a password-less superuser).
|
||||
|
||||
By default the witness server will listen on port 5499; this value can be
|
||||
overridden by explicitly providing the port number in the conninfo string
|
||||
in repmgr.conf. (Note that it is also possible to specify the port number
|
||||
with the -l/--local-port option, however this option is now deprecated and
|
||||
will be overridden by a port setting in the conninfo string).
|
||||
|
||||
Start the repmgrd daemons
|
||||
-------------------------
|
||||
|
||||
Log in to node2 and witness::
|
||||
|
||||
su - postgres
|
||||
repmgrd -f /etc/repmgr/repmgr.conf --daemonize -> /var/log/postgresql/repmgr.log 2>&1
|
||||
|
||||
**Note:** The Master does not need a repmgrd daemon.
|
||||
|
||||
Suspend Automatic behavior
|
||||
==========================
|
||||
|
||||
Edit the repmgr.conf of the node to remove from automatic processing and change::
|
||||
|
||||
failover=manual
|
||||
|
||||
Then, signal repmgrd daemon::
|
||||
|
||||
su - postgres
|
||||
kill -HUP $(pidof repmgrd)
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
The repmgr documentation is in the README file (how to build, options, etc.)
|
||||
|
||||
18
FAQ.md
18
FAQ.md
@@ -38,7 +38,7 @@ General
|
||||
|
||||
No. Hash indexes and replication do not mix well and their use is
|
||||
explicitly discouraged; see:
|
||||
https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||
|
||||
`repmgr`
|
||||
--------
|
||||
@@ -120,22 +120,6 @@ General
|
||||
permission is for PostgreSQL's streaming replication and doesn't
|
||||
necessarily need to be the `repmgr` user.
|
||||
|
||||
- When cloning a standby, why do I need to provide the connection parameters
|
||||
for the primary server on the command line, not in the configuration file?
|
||||
|
||||
Cloning a standby is a one-time action; the role of the server being cloned
|
||||
from could change, so fixing it in the configuration file would create
|
||||
confusion. If `repmgr` needs to establish a connection to the primary
|
||||
server, it can retrieve this from the `repl_nodes` table or if necessary
|
||||
scan the replication cluster until it locates the active primary.
|
||||
|
||||
- Why is there no foreign key on the `node_id` column in the `repl_events`
|
||||
table?
|
||||
|
||||
Under some circumstances event notifications can be generated for servers
|
||||
which have not yet been registered; it's also useful to retain a record
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the `repl_nodes` table.
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
|
||||
52
HISTORY
52
HISTORY
@@ -1,55 +1,5 @@
|
||||
3.1.5 2016-08-15
|
||||
repmgrd: in a failover situation, prevent endless looping when
|
||||
attempting to establish the status of a node with
|
||||
`failover=manual` (Ian)
|
||||
repmgrd: improve handling of failover events on standbys with
|
||||
`failover=manual`, and create a new event notification
|
||||
for this, `standby_disconnect_manual` (Ian)
|
||||
repmgr: add further event notifications (Gianni)
|
||||
repmgr: when executing `standby switchover`, don't collect remote
|
||||
command output unless required (Gianni, Ian)
|
||||
repmgrd: improve standby monitoring query (Ian, based on suggestion
|
||||
from Álvaro)
|
||||
repmgr: various command line handling improvements (Ian)
|
||||
|
||||
3.1.4 2016-07-12
|
||||
repmgr: new configuration option for setting "restore_command"
|
||||
in the recovery.conf file generated by repmgr (Martín)
|
||||
repmgr: add --csv option to "repmgr cluster show" (Gianni)
|
||||
repmgr: enable provision of a conninfo string as the -d/--dbname
|
||||
parameter, similar to other PostgreSQL utilities (Ian)
|
||||
repmgr: during switchover operations improve detection of
|
||||
demotion candidate shutdown (Ian)
|
||||
various bugfixes and documentation updates (Ian, Martín)
|
||||
|
||||
3.1.3 2016-05-17
|
||||
repmgrd: enable monitoring when a standby is catching up by
|
||||
replaying archived WAL (Ian)
|
||||
repmgrd: when upstream_node_id is NULL, assume upstream node
|
||||
to be current master (Ian)
|
||||
repmgrd: check for reappearance of the master node if standby
|
||||
promotion fails (Ian)
|
||||
improve handling of rsync failure conditions (Martín)
|
||||
|
||||
3.1.2 2016-04-12
|
||||
Fix pg_ctl path generation in do_standby_switchover() (Ian)
|
||||
Regularly sync witness server repl_nodes table (Ian)
|
||||
Documentation improvements (Gianni, dhyannataraj)
|
||||
(Experimental) ensure repmgr handles failover slots when copying
|
||||
in rsync mode (Craig, Ian)
|
||||
rsync mode handling fixes (Martín)
|
||||
Enable repmgr to compile against 9.6devel (Ian)
|
||||
|
||||
3.1.1 2016-02-24
|
||||
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
|
||||
Prevent repmgr/repmgrd running as root (Ian)
|
||||
|
||||
3.1.0 2016-02-01
|
||||
Add "repmgr standby switchover" command (Ian)
|
||||
Revised README file (Ian)
|
||||
3.0.4 2016-01-
|
||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||
Improve -?/--help output, showing default values if relevant (Ian)
|
||||
Various bugfixes to command line/configuration parameter handling (Ian)
|
||||
|
||||
3.0.3 2016-01-04
|
||||
Create replication slot if required before base backup is run (Abhijit)
|
||||
|
||||
48
Makefile
48
Makefile
@@ -2,32 +2,23 @@
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
|
||||
DATA = repmgr.sql uninstall_repmgr.sql
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
|
||||
all: repmgrd repmgr
|
||||
all: repmgrd repmgr
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgrd: $(repmgrd_OBJS)
|
||||
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgr: $(repmgr_OBJS)
|
||||
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
|
||||
# Make all objects depend on all include files. This is a bit of a
|
||||
# shotgun approach, but the codebase is small enough that a complete rebuild
|
||||
# is very fast anyway.
|
||||
$(repmgr_OBJS): $(HEADERS)
|
||||
$(repmgrd_OBJS): $(HEADERS)
|
||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
@@ -40,8 +31,8 @@ include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
# XXX: This overrides the pgxs install target - we're building two binaries,
|
||||
# which is not supported by pgxs.mk's PROGRAM construct.
|
||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
||||
# is overriding pgxs install.
|
||||
install: install_prog install_ext
|
||||
|
||||
install_prog:
|
||||
@@ -52,12 +43,6 @@ install_prog:
|
||||
install_ext:
|
||||
$(MAKE) -C sql install
|
||||
|
||||
# Distribution-specific package building targets
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# XXX we recommend using the PGDG-supplied packages where possible;
|
||||
# see README.md for details.
|
||||
|
||||
install_rhel:
|
||||
mkdir -p '$(DESTDIR)/etc/init.d/'
|
||||
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
||||
@@ -82,21 +67,16 @@ clean:
|
||||
rm -f repmgr
|
||||
$(MAKE) -C sql clean
|
||||
|
||||
# Get correct version numbers and install paths, depending on your postgres version
|
||||
PG_VERSION = $(shell pg_config --version | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
|
||||
REPMGR_VERSION = $(shell grep REPMGR_VERSION version.h | cut -d ' ' -f 3 | cut -d '"' -f 2)
|
||||
PKGLIBDIR = $(shell pg_config --pkglibdir)
|
||||
SHAREDIR = $(shell pg_config --sharedir)
|
||||
|
||||
deb: repmgrd repmgr
|
||||
mkdir -p ./debian/usr/bin
|
||||
cp repmgrd repmgr ./debian/usr/bin/
|
||||
mkdir -p ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/uninstall_repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
mkdir -p ./debian$(PKGLIBDIR)/
|
||||
cp sql/repmgr_funcs.so ./debian$(PKGLIBDIR)/
|
||||
mkdir -p ./debian/usr/share/postgresql/9.0/contrib/
|
||||
cp sql/repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
|
||||
cp sql/uninstall_repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
|
||||
mkdir -p ./debian/usr/lib/postgresql/9.0/lib/
|
||||
cp sql/repmgr_funcs.so ./debian/usr/lib/postgresql/9.0/lib/
|
||||
dpkg-deb --build debian
|
||||
mv debian.deb ../postgresql-repmgr-$(PG_VERSION)_$(REPMGR_VERSION).deb
|
||||
mv debian.deb ../postgresql-repmgr-9.0_1.0.0.deb
|
||||
rm -rf ./debian/usr
|
||||
|
||||
|
||||
|
||||
119
QUICKSTART.md
119
QUICKSTART.md
@@ -1 +1,118 @@
|
||||
The contents of this file have been incorporated into the main README.md document.
|
||||
repmgr quickstart guide
|
||||
=======================
|
||||
|
||||
This quickstart guide provides some annotated examples on basic
|
||||
`repmgr` setup. It assumes you are familiar with PostgreSQL replication
|
||||
concepts setup and Linux/UNIX system administration.
|
||||
|
||||
For the purposes of this guide, we'll assume the database user will be
|
||||
`repmgr_usr` and the database will be `repmgr_db`.
|
||||
|
||||
|
||||
Master setup
|
||||
------------
|
||||
|
||||
1. Configure PostgreSQL
|
||||
|
||||
- create user and database:
|
||||
|
||||
```
|
||||
CREATE ROLE repmgr_usr LOGIN SUPERUSER;
|
||||
CREATE DATABASE repmgr_db OWNER repmgr_usr;
|
||||
```
|
||||
|
||||
- configure `postgresql.conf` for replication (see README.md for sample
|
||||
settings)
|
||||
|
||||
- update `pg_hba.conf`, e.g.:
|
||||
|
||||
```
|
||||
host repmgr_db repmgr_usr 192.168.1.0/24 trust
|
||||
host replication repmgr_usr 192.168.1.0/24 trust
|
||||
```
|
||||
|
||||
Restart the PostgreSQL server after making these changes.
|
||||
|
||||
2. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat /path/to/repmgr/node1/repmgr.conf
|
||||
cluster=test
|
||||
node=1
|
||||
node_name=node1
|
||||
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
(For an annotated `repmgr.conf` file, see `repmgr.conf.sample` in the
|
||||
repository's root directory).
|
||||
|
||||
3. Register the master node with `repmgr`:
|
||||
|
||||
$ repmgr -f /path/to/repmgr/node1/repmgr.conf --verbose master register
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 17:45:53] [INFO] master register: creating database objects inside the repmgr_test schema
|
||||
[2015-03-03 17:45:53] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
Standby setup
|
||||
-------------
|
||||
|
||||
1. Use `repmgr standby clone` to clone a standby from the master:
|
||||
|
||||
repmgr -D /path/to/standby/data -d repmgr_db -U repmgr_usr --verbose standby clone 192.168.1.2
|
||||
[2015-03-03 18:18:21] [NOTICE] No configuration file provided and default file './repmgr.conf' not found - continuing with default values
|
||||
[2015-03-03 18:18:21] [NOTICE] repmgr Destination directory ' /path/to/standby/data' provided
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connecting to upstream node
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connected to upstream node, checking its state
|
||||
[2015-03-03 18:18:21] [INFO] Successfully connected to upstream node. Current installation size is 27 MB
|
||||
[2015-03-03 18:18:21] [NOTICE] Starting backup...
|
||||
[2015-03-03 18:18:21] [INFO] creating directory " /path/to/standby/data"...
|
||||
[2015-03-03 18:18:21] [INFO] Executing: 'pg_basebackup -l "repmgr base backup" -h localhost -p 9595 -U repmgr_usr -D /path/to/standby/data '
|
||||
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
|
||||
[2015-03-03 18:18:23] [NOTICE] repmgr standby clone (using pg_basebackup) complete
|
||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
||||
|
||||
Note that the `repmgr.conf` file is not required when cloning a standby.
|
||||
However we recommend providing a valid `repmgr.conf` if you wish to use
|
||||
replication slots, or want `repmgr` to log the clone event to the
|
||||
`repl_events` table.
|
||||
|
||||
This will clone the PostgreSQL database files from the master, including its
|
||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||
the `recovery.conf` file containing the correct parameters to start streaming
|
||||
from the primary node.
|
||||
|
||||
2. Start the PostgreSQL server
|
||||
|
||||
3. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat /path/node2/repmgr/repmgr.conf
|
||||
cluster=test
|
||||
node=2
|
||||
node_name=node2
|
||||
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
4. Register the standby node with `repmgr`:
|
||||
|
||||
$ repmgr -f /path/to/repmgr/node2/repmgr.conf --verbose standby register
|
||||
[2015-03-03 18:24:34] [NOTICE] Opening configuration file: /path/to/repmgr/node2/repmgr.conf
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to standby database
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 18:24:34] [INFO] finding node list for cluster 'test'
|
||||
[2015-03-03 18:24:34] [INFO] checking role of cluster node '1'
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby complete
|
||||
[2015-03-03 18:24:34] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
|
||||
This concludes the basic `repmgr` setup of master and standby. The records
|
||||
created in the `repl_nodes` table should look something like this:
|
||||
|
||||
repmgr_db=# SELECT * from repmgr_test.repl_nodes;
|
||||
id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active
|
||||
----+---------+------------------+---------+-------+----------------------------------------------------+-----------+----------+--------
|
||||
1 | primary | | test | node1 | host=repmgr_node1 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
2 | standby | 1 | test | node2 | host=repmgr_node2 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
(2 rows)
|
||||
|
||||
61
RHEL/repmgr3-93.spec
Normal file
61
RHEL/repmgr3-93.spec
Normal file
@@ -0,0 +1,61 @@
|
||||
Summary: repmgr
|
||||
Name: repmgr
|
||||
Version: 3.0
|
||||
Release: 1
|
||||
License: GPLv3
|
||||
Group: System Environment/Daemons
|
||||
URL: http://repmgr.org
|
||||
Packager: Ian Barwick <ian@2ndquadrant.com>
|
||||
Vendor: 2ndQuadrant Limited
|
||||
Distribution: centos
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||
|
||||
%description
|
||||
repmgr is a utility suite which greatly simplifies
|
||||
the process of setting up and managing replication
|
||||
using streaming replication within a cluster of
|
||||
PostgreSQL servers.
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
%{__make} USE_PGXS=1
|
||||
|
||||
%install
|
||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
||||
|
||||
export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
|
||||
|
||||
|
||||
%clean
|
||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
||||
|
||||
|
||||
%files
|
||||
%defattr(-,root,root)
|
||||
/usr/bin/repmgr
|
||||
/usr/bin/repmgrd
|
||||
/usr/pgsql-9.3/bin/repmgr
|
||||
/usr/pgsql-9.3/bin/repmgrd
|
||||
/usr/pgsql-9.3/lib/repmgr_funcs.so
|
||||
/usr/pgsql-9.3/share/contrib/repmgr.sql
|
||||
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
|
||||
%attr(0755,root,root)/etc/init.d/repmgrd
|
||||
%attr(0644,root,root)/etc/sysconfig/repmgrd
|
||||
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
||||
|
||||
%changelog
|
||||
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
|
||||
- build for repmgr 3.0
|
||||
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
||||
- fix witness creation to create db and user if needed
|
||||
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
||||
- initial build for RHEL6
|
||||
133
RHEL/repmgrd.init
Executable file
133
RHEL/repmgrd.init
Executable file
@@ -0,0 +1,133 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# chkconfig: - 75 16
|
||||
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# processname: repmgrd
|
||||
# pidfile="/var/run/${NAME}.pid"
|
||||
|
||||
# Source function library.
|
||||
INITD=/etc/rc.d/init.d
|
||||
. $INITD/functions
|
||||
|
||||
# Get function listing for cross-distribution logic.
|
||||
TYPESET=`typeset -f|grep "declare"`
|
||||
|
||||
# Get network config.
|
||||
. /etc/sysconfig/network
|
||||
|
||||
DESC="PostgreSQL replication management and monitoring daemon"
|
||||
NAME=repmgrd
|
||||
|
||||
REPMGRD_ENABLED=no
|
||||
REPMGRD_OPTS=
|
||||
REPMGRD_USER=postgres
|
||||
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
||||
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
||||
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
||||
|
||||
# Read configuration variable file if it is present
|
||||
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
||||
|
||||
# For SELinux we need to use 'runuser' not 'su'
|
||||
if [ -x /sbin/runuser ]
|
||||
then
|
||||
SU=runuser
|
||||
else
|
||||
SU=su
|
||||
fi
|
||||
|
||||
test -x $REPMGRD_BIN || exit 0
|
||||
|
||||
case "$REPMGRD_ENABLED" in
|
||||
[Yy]*)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
if [ -z "${REPMGRD_OPTS}" ]
|
||||
then
|
||||
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
start()
|
||||
{
|
||||
REPMGRD_START=$"Starting ${NAME} service: "
|
||||
|
||||
# Make sure startup-time log file is valid
|
||||
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
||||
then
|
||||
touch "${REPMGRD_LOG}" || exit 1
|
||||
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
||||
chmod go-rwx "${REPMGRD_LOG}"
|
||||
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
||||
fi
|
||||
|
||||
echo -n "${REPMGRD_START}"
|
||||
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
||||
sleep 2
|
||||
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
||||
if [ "x${pid}" != "x" ]
|
||||
then
|
||||
success "${REPMGRD_START}"
|
||||
touch "${REPMGRD_LOCK}"
|
||||
echo $pid > "${REPMGRD_PIDFILE}"
|
||||
echo
|
||||
else
|
||||
failure "${REPMGRD_START}"
|
||||
echo
|
||||
script_result=1
|
||||
fi
|
||||
}
|
||||
|
||||
stop()
|
||||
{
|
||||
echo -n $"Stopping ${NAME} service: "
|
||||
if [ -e "${REPMGRD_LOCK}" ]
|
||||
then
|
||||
killproc ${NAME}
|
||||
ret=$?
|
||||
if [ $ret -eq 0 ]
|
||||
then
|
||||
echo_success
|
||||
rm -f "${REPMGRD_PIDFILE}"
|
||||
rm -f "${REPMGRD_LOCK}"
|
||||
else
|
||||
echo_failure
|
||||
script_result=1
|
||||
fi
|
||||
else
|
||||
# not running; per LSB standards this is "ok"
|
||||
echo_success
|
||||
fi
|
||||
echo
|
||||
}
|
||||
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
start
|
||||
;;
|
||||
stop)
|
||||
stop
|
||||
;;
|
||||
status)
|
||||
status -p $REPMGRD_PIDFILE $NAME
|
||||
script_result=$?
|
||||
;;
|
||||
restart)
|
||||
stop
|
||||
start
|
||||
;;
|
||||
*)
|
||||
echo $"Usage: $0 {start|stop|status|restart}"
|
||||
exit 2
|
||||
esac
|
||||
|
||||
exit $script_result
|
||||
21
RHEL/repmgrd.sysconfig
Normal file
21
RHEL/repmgrd.sysconfig
Normal file
@@ -0,0 +1,21 @@
|
||||
# default settings for repmgrd. This file is source by /bin/sh from
|
||||
# /etc/init.d/repmgrd
|
||||
|
||||
# disable repmgrd by default so it won't get started upon installation
|
||||
# valid values: yes/no
|
||||
REPMGRD_ENABLED=no
|
||||
|
||||
# Options for repmgrd (required)
|
||||
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
||||
|
||||
# User to run repmgrd as
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
||||
|
||||
# pid file
|
||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||
|
||||
# log file
|
||||
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
||||
11
TODO
11
TODO
@@ -40,6 +40,13 @@ Planned feature improvements
|
||||
* make old master node ID available for event notification commands
|
||||
(See github issue #80).
|
||||
|
||||
* Have pg_basebackup use replication slots, if and when support for
|
||||
this is added; see:
|
||||
http://www.postgresql.org/message-id/555DD2B2.7020000@gmx.net
|
||||
|
||||
* use "primary/standby" terminology in place of "master/slave" for consistency
|
||||
with main PostrgreSQL usage
|
||||
|
||||
* repmgr standby clone: possibility to use barman instead of performing a new base backup
|
||||
|
||||
* possibility to transform a failed master into a new standby with pg_rewind
|
||||
@@ -53,10 +60,6 @@ Planned feature improvements
|
||||
requested, activate the replication slot using pg_receivexlog to negate the
|
||||
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
||||
|
||||
* repmgr: enable "standby follow" to point a standby at another standby, not
|
||||
just the replication cluster master (see GitHub #130)
|
||||
|
||||
|
||||
Usability improvements
|
||||
======================
|
||||
|
||||
|
||||
80
config.c
80
config.c
@@ -28,10 +28,10 @@ static void parse_event_notifications_list(t_configuration_options *options, con
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
static void exit_with_errors(ErrorList *config_errors);
|
||||
|
||||
const static char *_progname = NULL;
|
||||
const static char *_progname = '\0';
|
||||
static char config_file_path[MAXPGPATH];
|
||||
static bool config_file_provided = false;
|
||||
bool config_file_found = false;
|
||||
static bool config_file_found = false;
|
||||
|
||||
|
||||
void
|
||||
@@ -149,7 +149,7 @@ load_config(const char *config_file, bool verbose, t_configuration_options *opti
|
||||
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("looking for configuration file in %s\n"), sysconf_etc_path);
|
||||
log_notice(_("looking for configuration file in %s"), sysconf_etc_path);
|
||||
}
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "%s/%s", sysconf_etc_path, CONFIG_FILE_NAME);
|
||||
@@ -198,13 +198,11 @@ parse_config(t_configuration_options *options)
|
||||
|
||||
/* For sanity-checking provided conninfo string */
|
||||
PQconninfoOption *conninfo_options;
|
||||
char *conninfo_errmsg = NULL;
|
||||
char *conninfo_errmsg = NULL;
|
||||
|
||||
/* Collate configuration file errors here for friendlier reporting */
|
||||
static ErrorList config_errors = { NULL, NULL };
|
||||
|
||||
bool node_found = false;
|
||||
|
||||
/* Initialize configuration options with sensible defaults
|
||||
* note: the default log level is set in log.c and does not need
|
||||
* to be initialised here
|
||||
@@ -224,7 +222,6 @@ parse_config(t_configuration_options *options)
|
||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
|
||||
memset(options->restore_command, 0, sizeof(options->restore_command));
|
||||
|
||||
/* default master_response_timeout is 60 seconds */
|
||||
options->master_response_timeout = 60;
|
||||
@@ -236,12 +233,7 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
options->event_notifications.head = NULL;
|
||||
options->event_notifications.tail = NULL;
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
options->tablespace_mapping.tail = NULL;
|
||||
@@ -252,7 +244,7 @@ parse_config(t_configuration_options *options)
|
||||
*/
|
||||
if (config_file_found == false)
|
||||
{
|
||||
log_verbose(LOG_NOTICE, _("no configuration file provided and no default file found - "
|
||||
log_notice(_("no configuration file provided and no default file found - "
|
||||
"continuing with default values\n"));
|
||||
return true;
|
||||
}
|
||||
@@ -298,12 +290,9 @@ parse_config(t_configuration_options *options)
|
||||
if (strcmp(name, "cluster") == 0)
|
||||
strncpy(options->cluster_name, value, MAXLEN);
|
||||
else if (strcmp(name, "node") == 0)
|
||||
{
|
||||
options->node = repmgr_atoi(value, "node", &config_errors, false);
|
||||
node_found = true;
|
||||
}
|
||||
options->node = repmgr_atoi(value, "node", &config_errors);
|
||||
else if (strcmp(name, "upstream_node") == 0)
|
||||
options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors, false);
|
||||
options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy(options->conninfo, value, MAXLEN);
|
||||
else if (strcmp(name, "rsync_options") == 0)
|
||||
@@ -334,7 +323,7 @@ parse_config(t_configuration_options *options)
|
||||
}
|
||||
}
|
||||
else if (strcmp(name, "priority") == 0)
|
||||
options->priority = repmgr_atoi(value, "priority", &config_errors, true);
|
||||
options->priority = repmgr_atoi(value, "priority", &config_errors);
|
||||
else if (strcmp(name, "node_name") == 0)
|
||||
strncpy(options->node_name, value, MAXLEN);
|
||||
else if (strcmp(name, "promote_command") == 0)
|
||||
@@ -342,17 +331,16 @@ parse_config(t_configuration_options *options)
|
||||
else if (strcmp(name, "follow_command") == 0)
|
||||
strncpy(options->follow_command, value, MAXLEN);
|
||||
else if (strcmp(name, "master_response_timeout") == 0)
|
||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||
/*
|
||||
* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors);
|
||||
/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||
* we'll switch terminology in a future release (3.1?)
|
||||
*/
|
||||
else if (strcmp(name, "primary_response_timeout") == 0)
|
||||
options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", &config_errors, false);
|
||||
options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", &config_errors);
|
||||
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||
options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", &config_errors, false);
|
||||
options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", &config_errors);
|
||||
else if (strcmp(name, "reconnect_interval") == 0)
|
||||
options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", &config_errors, false);
|
||||
options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", &config_errors);
|
||||
else if (strcmp(name, "pg_bindir") == 0)
|
||||
strncpy(options->pg_bindir, value, MAXLEN);
|
||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||
@@ -362,22 +350,18 @@ parse_config(t_configuration_options *options)
|
||||
else if (strcmp(name, "logfile") == 0)
|
||||
strncpy(options->logfile, value, MAXLEN);
|
||||
else if (strcmp(name, "monitor_interval_secs") == 0)
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
/* XXX we should have a dedicated boolean argument format */
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors);
|
||||
else if (strcmp(name, "event_notification_command") == 0)
|
||||
strncpy(options->event_notification_command, value, MAXLEN);
|
||||
else if (strcmp(name, "event_notifications") == 0)
|
||||
parse_event_notifications_list(options, value);
|
||||
else if (strcmp(name, "tablespace_mapping") == 0)
|
||||
tablespace_list_append(options, value);
|
||||
else if (strcmp(name, "restore_command") == 0)
|
||||
strncpy(options->restore_command, value, MAXLEN);
|
||||
else
|
||||
{
|
||||
known_parameter = false;
|
||||
@@ -403,17 +387,29 @@ parse_config(t_configuration_options *options)
|
||||
|
||||
fclose(fp);
|
||||
|
||||
/* Check config settings */
|
||||
|
||||
if (node_found == false)
|
||||
/* The following checks are for the presence of the parameter */
|
||||
if (*options->cluster_name == '\0')
|
||||
{
|
||||
error_list_append(&config_errors, _("\"node\": parameter was not found"));
|
||||
}
|
||||
else if (options->node == 0)
|
||||
{
|
||||
error_list_append(&config_errors, _("\"node\": must be greater than zero"));
|
||||
error_list_append(&config_errors, _("\"cluster\": parameter was not found\n"));
|
||||
}
|
||||
|
||||
if (strlen(options->conninfo))
|
||||
if (options->node == -1)
|
||||
{
|
||||
error_list_append(&config_errors, _("\"node\": parameter was not found\n"));
|
||||
}
|
||||
|
||||
if (*options->node_name == '\0')
|
||||
{
|
||||
error_list_append(&config_errors, _("\"node_name\": parameter was not found\n"));
|
||||
}
|
||||
|
||||
if (*options->conninfo == '\0')
|
||||
{
|
||||
error_list_append(&config_errors, _("\"conninfo\": parameter was not found\n"));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* Sanity check the provided conninfo string
|
||||
@@ -795,7 +791,7 @@ error_list_append(ErrorList *error_list, char *error_message)
|
||||
* otherwise exit
|
||||
*/
|
||||
int
|
||||
repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list, bool allow_negative)
|
||||
repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list)
|
||||
{
|
||||
char *endptr;
|
||||
long longval = 0;
|
||||
@@ -826,8 +822,8 @@ repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list, b
|
||||
}
|
||||
}
|
||||
|
||||
/* Disallow negative values for most parameters */
|
||||
if (allow_negative == false && longval < 0)
|
||||
/* Currently there are no values which could be negative */
|
||||
if (longval < 0)
|
||||
{
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
|
||||
11
config.h
11
config.h
@@ -72,22 +72,16 @@ typedef struct
|
||||
char pg_bindir[MAXLEN];
|
||||
char pg_ctl_options[MAXLEN];
|
||||
char pg_basebackup_options[MAXLEN];
|
||||
char restore_command[MAXLEN];
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int witness_repl_nodes_sync_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
/*
|
||||
* The following will initialize the structure with a minimal set of options;
|
||||
* actual defaults are set in parse_config() before parsing the configuration file
|
||||
*/
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
@@ -112,7 +106,6 @@ char *trim(char *s);
|
||||
void error_list_append(ErrorList *error_list, char *error_message);
|
||||
int repmgr_atoi(const char *s,
|
||||
const char *config_item,
|
||||
ErrorList *error_list,
|
||||
bool allow_negative);
|
||||
ErrorList *error_list);
|
||||
|
||||
#endif
|
||||
|
||||
487
dbutils.c
487
dbutils.c
@@ -26,15 +26,11 @@
|
||||
#include "strutil.h"
|
||||
#include "log.h"
|
||||
|
||||
#include "catalog/pg_control.h"
|
||||
|
||||
char repmgr_schema[MAXLEN] = "";
|
||||
char repmgr_schema_quoted[MAXLEN] = "";
|
||||
|
||||
static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
|
||||
|
||||
PGconn *
|
||||
_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice, const bool verbose_only)
|
||||
establish_db_connection(const char *conninfo, const bool exit_on_error)
|
||||
{
|
||||
/* Make a connection to the database */
|
||||
PGconn *conn = NULL;
|
||||
@@ -50,24 +46,8 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
||||
/* Check to see that the backend connection was successfully made */
|
||||
if ((PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
bool emit_log = true;
|
||||
|
||||
if (verbose_only == true && verbose_logging == false)
|
||||
emit_log = false;
|
||||
|
||||
if (emit_log)
|
||||
{
|
||||
if (log_notice)
|
||||
{
|
||||
log_notice(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_err(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
}
|
||||
log_err(_("connection to database failed: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
if (exit_on_error)
|
||||
{
|
||||
@@ -79,38 +59,6 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
||||
return conn;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Establish a database connection, optionally exit on error
|
||||
*/
|
||||
PGconn *
|
||||
establish_db_connection(const char *conninfo, const bool exit_on_error)
|
||||
{
|
||||
return _establish_db_connection(conninfo, exit_on_error, false, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to establish a database connection, never exit on error, only
|
||||
* output error messages if --verbose option used
|
||||
*/
|
||||
PGconn *
|
||||
establish_db_connection_quiet(const char *conninfo)
|
||||
{
|
||||
return _establish_db_connection(conninfo, false, false, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to establish a database connection, never exit on error,
|
||||
* output connection error messages as NOTICE (useful when connection
|
||||
* failure is expected)
|
||||
*/
|
||||
PGconn *
|
||||
test_db_connection(const char *conninfo)
|
||||
{
|
||||
return _establish_db_connection(conninfo, false, true, false);
|
||||
}
|
||||
|
||||
|
||||
PGconn *
|
||||
establish_db_connection_by_params(const char *keywords[], const char *values[],
|
||||
const bool exit_on_error)
|
||||
@@ -360,7 +308,7 @@ get_master_node_id(PGconn *conn, char *cluster)
|
||||
}
|
||||
else if (PQntuples(res) == 0)
|
||||
{
|
||||
log_verbose(LOG_WARNING, _("get_master_node_id(): no active primary found\n"));
|
||||
log_warning(_("get_master_node_id(): no active primary found\n"));
|
||||
retval = NODE_NOT_FOUND;
|
||||
}
|
||||
else
|
||||
@@ -448,7 +396,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||
parameter, datatype, op, value, datatype);
|
||||
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -497,6 +445,7 @@ get_cluster_size(PGconn *conn, char *size)
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool
|
||||
get_pg_setting(PGconn *conn, const char *setting, char *output)
|
||||
{
|
||||
@@ -539,7 +488,7 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
||||
|
||||
if (success == true)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, _("get_pg_setting(): returned value is \"%s\"\n"), output);
|
||||
log_debug(_("get_pg_setting(): returned value is \"%s\"\n"), output);
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
@@ -548,48 +497,6 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get_conninfo_value()
|
||||
*
|
||||
* Extract the value represented by 'keyword' in 'conninfo' and copy
|
||||
* it to the 'output' buffer.
|
||||
*
|
||||
* Returns true on success, or false on failure (conninfo string could
|
||||
* not be parsed, or provided keyword not found).
|
||||
*/
|
||||
|
||||
bool
|
||||
get_conninfo_value(const char *conninfo, const char *keyword, char *output)
|
||||
{
|
||||
PQconninfoOption *conninfo_options;
|
||||
PQconninfoOption *conninfo_option;
|
||||
|
||||
conninfo_options = PQconninfoParse(conninfo, NULL);
|
||||
|
||||
if (conninfo_options == NULL)
|
||||
{
|
||||
log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (conninfo_option = conninfo_options; conninfo_option->keyword != NULL; conninfo_option++)
|
||||
{
|
||||
if (strcmp(conninfo_option->keyword, keyword) == 0)
|
||||
{
|
||||
if (conninfo_option->val != NULL && conninfo_option->val[0] != '\0')
|
||||
{
|
||||
strncpy(output, conninfo_option->val, MAXLEN);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PQconninfoFree(conninfo_options);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get_upstream_connection()
|
||||
*
|
||||
@@ -615,7 +522,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
upstream_conninfo = upstream_conninfo_out;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.id "
|
||||
" SELECT un.conninfo, un.name, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
"INNER JOIN %s.repl_nodes n "
|
||||
" ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
|
||||
@@ -632,7 +539,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("error when attempting to find upstream server\n%s\n"),
|
||||
log_err(_("unable to get conninfo for upstream server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
@@ -640,36 +547,9 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
log_notice(_("no record found for upstream server"));
|
||||
PQclear(res);
|
||||
log_debug("no record found for upstream server\n");
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" SELECT un.conninfo, un.id "
|
||||
" FROM %s.repl_nodes un "
|
||||
" WHERE un.cluster = '%s' "
|
||||
" AND un.type='master' "
|
||||
" AND un.active IS TRUE",
|
||||
get_repmgr_schema_quoted(standby_conn),
|
||||
cluster);
|
||||
res = PQexec(standby_conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("error when attempting to find active master server\n%s\n"),
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
PQclear(res);
|
||||
log_notice(_("no record found for active master server\n"));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
log_debug("record found for active master server\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
|
||||
@@ -718,13 +598,6 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
||||
int i,
|
||||
node_id;
|
||||
|
||||
/*
|
||||
* If the caller wanted to get a copy of the connection info string, sub
|
||||
* out the local stack pointer for the pointer passed by the caller.
|
||||
*/
|
||||
if (master_conninfo_out != NULL)
|
||||
remote_conninfo = master_conninfo_out;
|
||||
|
||||
if (master_id != NULL)
|
||||
{
|
||||
*master_id = NODE_NOT_FOUND;
|
||||
@@ -944,12 +817,10 @@ get_repmgr_schema_quoted(PGconn *conn)
|
||||
|
||||
|
||||
bool
|
||||
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
create_replication_slot(PGconn *conn, char *slot_name)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int query_res;
|
||||
PGresult *res;
|
||||
t_replication_slot slot_info;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
/*
|
||||
* Check whether slot exists already; if it exists and is active, that
|
||||
@@ -957,43 +828,48 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
* if not we can reuse it as-is
|
||||
*/
|
||||
|
||||
query_res = get_slot_record(conn, slot_name, &slot_info);
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT active, slot_type "
|
||||
" FROM pg_replication_slots "
|
||||
" WHERE slot_name = '%s' ",
|
||||
slot_name);
|
||||
|
||||
if (query_res)
|
||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
if (strcmp(slot_info.slot_type, "physical") != 0)
|
||||
log_err(_("unable to query pg_replication_slots: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PQntuples(res))
|
||||
{
|
||||
if (strcmp(PQgetvalue(res, 0, 1), "physical") != 0)
|
||||
{
|
||||
log_err(_("Slot '%s' exists and is not a physical slot\n"),
|
||||
slot_name);
|
||||
return false;
|
||||
PQclear(res);
|
||||
}
|
||||
|
||||
if (slot_info.active == false)
|
||||
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
log_debug("Replication slot '%s' exists but is inactive; reusing\n",
|
||||
slot_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
log_err(_("Slot '%s' already exists as an active slot\n"),
|
||||
slot_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* In 9.6 and later, reserve the LSN straight away */
|
||||
if (server_version_num >= 90600)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
|
||||
slot_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
}
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT * FROM pg_create_physical_replication_slot('%s')",
|
||||
slot_name);
|
||||
|
||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||
@@ -1012,46 +888,6 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult *res;
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT slot_name, slot_type, active "
|
||||
" FROM pg_replication_slots "
|
||||
" WHERE slot_name = '%s' ",
|
||||
slot_name);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_slot_record():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("unable to query pg_replication_slots: %s\n"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!PQntuples(res))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
strncpy(record->slot_name, PQgetvalue(res, 0, 0), MAXLEN);
|
||||
strncpy(record->slot_type, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
record->active = (strcmp(PQgetvalue(res, 0, 2), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool
|
||||
drop_replication_slot(PGconn *conn, char *slot_name)
|
||||
{
|
||||
@@ -1176,7 +1012,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
|
||||
|
||||
/*
|
||||
* witness_copy_node_records()
|
||||
* copy_configuration()
|
||||
*
|
||||
* Copy records in master's `repl_nodes` table to witness database
|
||||
*
|
||||
@@ -1184,49 +1020,29 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
||||
* `repmgrd` after a failover event occurs
|
||||
*/
|
||||
bool
|
||||
witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
{
|
||||
char sqlquery[MAXLEN];
|
||||
PGresult *res;
|
||||
int i;
|
||||
|
||||
begin_transaction(witnessconn);
|
||||
|
||||
/* Defer constraints */
|
||||
sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to defer constraints:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Truncate existing records */
|
||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(witnessconn, sqlquery);
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get current records from primary */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
||||
get_repmgr_schema_quoted(masterconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
|
||||
res = PQexec(masterconn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -1234,23 +1050,20 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
|
||||
log_err("Unable to retrieve node records from master:\n%s\n",
|
||||
PQerrorMessage(masterconn));
|
||||
PQclear(res);
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Insert primary records into witness table */
|
||||
for (i = 0; i < PQntuples(res); i++)
|
||||
{
|
||||
bool node_record_created;
|
||||
|
||||
log_verbose(LOG_DEBUG,
|
||||
"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 3),
|
||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 4),
|
||||
PQgetvalue(res, i, 0));
|
||||
|
||||
node_record_created = create_node_record(witnessconn,
|
||||
"witness_copy_node_records",
|
||||
"copy_configuration",
|
||||
atoi(PQgetvalue(res, i, 0)),
|
||||
PQgetvalue(res, i, 1),
|
||||
strlen(PQgetvalue(res, i, 2))
|
||||
@@ -1262,10 +1075,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
|
||||
atoi(PQgetvalue(res, i, 5)),
|
||||
strlen(PQgetvalue(res, i, 6))
|
||||
? PQgetvalue(res, i, 6)
|
||||
: NULL,
|
||||
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||
? true
|
||||
: false
|
||||
: NULL
|
||||
);
|
||||
|
||||
if (node_record_created == false)
|
||||
@@ -1274,16 +1084,11 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
|
||||
|
||||
log_err("Unable to copy node record to witness database\n%s\n",
|
||||
PQerrorMessage(witnessconn));
|
||||
rollback_transaction(witnessconn);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
/* And finished */
|
||||
commit_transaction(witnessconn);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1296,7 +1101,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
|
||||
* XXX we should pass the record parameters as a struct.
|
||||
*/
|
||||
bool
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char upstream_node_id[MAXLEN];
|
||||
@@ -1337,8 +1142,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes "
|
||||
" (id, type, upstream_node_id, cluster, "
|
||||
" name, conninfo, slot_name, priority, active) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||
" name, conninfo, slot_name, priority) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
node,
|
||||
type,
|
||||
@@ -1347,8 +1152,7 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
node_name,
|
||||
conninfo,
|
||||
slot_name_buf,
|
||||
priority,
|
||||
active == true ? "TRUE" : "FALSE");
|
||||
priority);
|
||||
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||
|
||||
@@ -1388,7 +1192,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
||||
|
||||
if (action != NULL)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||
}
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
@@ -1623,7 +1427,6 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Update node record following change of status
|
||||
* (e.g. inactive primary converted to standby)
|
||||
@@ -1632,7 +1435,7 @@ bool
|
||||
update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
" UPDATE %s.repl_nodes "
|
||||
@@ -1705,181 +1508,21 @@ update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
|
||||
PGresult *
|
||||
get_node_record(PGconn *conn, char *cluster, int node_id)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int result;
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
cluster,
|
||||
node_id);
|
||||
sprintf(sqlquery,
|
||||
"SELECT id, upstream_node_id, conninfo, type, slot_name, active "
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND id = %i",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
cluster,
|
||||
node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
|
||||
|
||||
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int result;
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
|
||||
" FROM %s.repl_nodes "
|
||||
" WHERE cluster = '%s' "
|
||||
" AND name = '%s'",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
cluster,
|
||||
node_name);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery);
|
||||
|
||||
result = _get_node_record(conn, cluster, sqlquery, node_info);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info)
|
||||
{
|
||||
int ntuples;
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
ntuples = PQntuples(res);
|
||||
|
||||
if (ntuples == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
node_info->node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
node_info->type = parse_node_type(PQgetvalue(res, 0, 1));
|
||||
node_info->upstream_node_id = atoi(PQgetvalue(res, 0, 2));
|
||||
strncpy(node_info->name, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
strncpy(node_info->conninfo_str, PQgetvalue(res, 0, 4), MAXLEN);
|
||||
strncpy(node_info->slot_name, PQgetvalue(res, 0, 5), MAXLEN);
|
||||
node_info->priority = atoi(PQgetvalue(res, 0, 6));
|
||||
node_info->active = (strcmp(PQgetvalue(res, 0, 7), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return ntuples;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int
|
||||
get_node_replication_state(PGconn *conn, char *node_name, char *output)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
PGresult * res;
|
||||
|
||||
sqlquery_snprintf(
|
||||
sqlquery,
|
||||
" SELECT state "
|
||||
" FROM pg_catalog.pg_stat_replication"
|
||||
" WHERE application_name = '%s'",
|
||||
node_name
|
||||
);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
return 0;
|
||||
}
|
||||
|
||||
strncpy(output, PQgetvalue(res, 0, 0), MAXLEN);
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
t_server_type
|
||||
parse_node_type(const char *type)
|
||||
{
|
||||
if (strcmp(type, "master") == 0)
|
||||
{
|
||||
return MASTER;
|
||||
}
|
||||
else if (strcmp(type, "standby") == 0)
|
||||
{
|
||||
return STANDBY;
|
||||
}
|
||||
else if (strcmp(type, "witness") == 0)
|
||||
{
|
||||
return WITNESS;
|
||||
}
|
||||
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_data_checksum_version(const char *data_directory)
|
||||
{
|
||||
ControlFileData control_file;
|
||||
int fd;
|
||||
char control_file_path[MAXPGPATH];
|
||||
|
||||
snprintf(control_file_path, MAXPGPATH, "%s/global/pg_control", data_directory);
|
||||
if ((fd = open(control_file_path, O_RDONLY | PG_BINARY, 0)) == -1)
|
||||
{
|
||||
log_err(_("Unable to open control file \"%s\" for reading: %s\n"),
|
||||
control_file_path, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (read(fd, &control_file, sizeof(ControlFileData)) != sizeof(ControlFileData))
|
||||
{
|
||||
log_err(_("could not read file \"%s\": %s\n"),
|
||||
control_file_path, strerror(errno));
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
return (int)control_file.data_checksum_version;
|
||||
return PQexec(conn, sqlquery);
|
||||
}
|
||||
|
||||
40
dbutils.h
40
dbutils.h
@@ -66,27 +66,8 @@ typedef struct s_node_info
|
||||
InvalidXLogRecPtr \
|
||||
}
|
||||
|
||||
/*
|
||||
* Struct to store replication slot information
|
||||
*/
|
||||
|
||||
typedef struct s_replication_slot
|
||||
{
|
||||
char slot_name[MAXLEN];
|
||||
char slot_type[MAXLEN];
|
||||
bool active;
|
||||
} t_replication_slot;
|
||||
|
||||
|
||||
|
||||
PGconn *_establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error,
|
||||
const bool log_notice,
|
||||
const bool verbose_only);
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||
PGconn *test_db_connection(const char *conninfo);
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
const char *values[],
|
||||
const bool exit_on_error);
|
||||
@@ -105,7 +86,7 @@ int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value, const char *datatype);
|
||||
bool get_conninfo_value(const char *conninfo, const char *keyword, char *output);
|
||||
|
||||
PGconn *get_upstream_connection(PGconn *standby_conn, char *cluster,
|
||||
int node_id,
|
||||
int *upstream_node_id_ptr,
|
||||
@@ -117,23 +98,18 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||
int get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
||||
|
||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||
t_server_type parse_node_type(const char *type);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
#endif
|
||||
|
||||
|
||||
6
debian/DEBIAN/control
vendored
6
debian/DEBIAN/control
vendored
@@ -1,9 +1,9 @@
|
||||
Package: repmgr-auto
|
||||
Version: 3.1.3
|
||||
Version: 2.0beta2
|
||||
Section: database
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5
|
||||
Maintainer: Self built package <user@localhost>
|
||||
Depends: rsync, postgresql-9.0 | postgresql-9.1 | postgresql-9.2 | postgresql-9.3 | postgresql-9.4
|
||||
Maintainer: Jaime Casanova <jaime@2ndQuadrant.com>
|
||||
Description: PostgreSQL replication setup, magament and monitoring
|
||||
has two main executables
|
||||
|
||||
194
dirmod.c
194
dirmod.c
@@ -1,194 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* dirmod.c
|
||||
* directory handling functions
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "postgres_fe.h"
|
||||
|
||||
/* Don't modify declarations in system headers */
|
||||
|
||||
#include <unistd.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/*
|
||||
* pgfnames
|
||||
*
|
||||
* return a list of the names of objects in the argument directory. Caller
|
||||
* must call pgfnames_cleanup later to free the memory allocated by this
|
||||
* function.
|
||||
*/
|
||||
char **
|
||||
pgfnames(const char *path)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *file;
|
||||
char **filenames;
|
||||
int numnames = 0;
|
||||
int fnsize = 200; /* enough for many small dbs */
|
||||
|
||||
dir = opendir(path);
|
||||
if (dir == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
filenames = (char **) palloc(fnsize * sizeof(char *));
|
||||
|
||||
while (errno = 0, (file = readdir(dir)) != NULL)
|
||||
{
|
||||
if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
|
||||
{
|
||||
if (numnames + 1 >= fnsize)
|
||||
{
|
||||
fnsize *= 2;
|
||||
filenames = (char **) repalloc(filenames,
|
||||
fnsize * sizeof(char *));
|
||||
}
|
||||
filenames[numnames++] = pstrdup(file->d_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (errno)
|
||||
{
|
||||
fprintf(stderr, _("could not read directory \"%s\": %s\n"),
|
||||
path, strerror(errno));
|
||||
}
|
||||
|
||||
filenames[numnames] = NULL;
|
||||
|
||||
if (closedir(dir))
|
||||
{
|
||||
fprintf(stderr, _("could not close directory \"%s\": %s\n"),
|
||||
path, strerror(errno));
|
||||
}
|
||||
|
||||
return filenames;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* pgfnames_cleanup
|
||||
*
|
||||
* deallocate memory used for filenames
|
||||
*/
|
||||
void
|
||||
pgfnames_cleanup(char **filenames)
|
||||
{
|
||||
char **fn;
|
||||
|
||||
for (fn = filenames; *fn; fn++)
|
||||
pfree(*fn);
|
||||
|
||||
pfree(filenames);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* rmtree
|
||||
*
|
||||
* Delete a directory tree recursively.
|
||||
* Assumes path points to a valid directory.
|
||||
* Deletes everything under path.
|
||||
* If rmtopdir is true deletes the directory too.
|
||||
* Returns true if successful, false if there was any problem.
|
||||
* (The details of the problem are reported already, so caller
|
||||
* doesn't really have to say anything more, but most do.)
|
||||
*/
|
||||
bool
|
||||
rmtree(const char *path, bool rmtopdir)
|
||||
{
|
||||
bool result = true;
|
||||
char pathbuf[MAXPGPATH];
|
||||
char **filenames;
|
||||
char **filename;
|
||||
struct stat statbuf;
|
||||
|
||||
/*
|
||||
* we copy all the names out of the directory before we start modifying
|
||||
* it.
|
||||
*/
|
||||
filenames = pgfnames(path);
|
||||
|
||||
if (filenames == NULL)
|
||||
return false;
|
||||
|
||||
/* now we have the names we can start removing things */
|
||||
for (filename = filenames; *filename; filename++)
|
||||
{
|
||||
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
|
||||
|
||||
/*
|
||||
* It's ok if the file is not there anymore; we were just about to
|
||||
* delete it anyway.
|
||||
*
|
||||
* This is not an academic possibility. One scenario where this
|
||||
* happens is when bgwriter has a pending unlink request for a file in
|
||||
* a database that's being dropped. In dropdb(), we call
|
||||
* ForgetDatabaseFsyncRequests() to flush out any such pending unlink
|
||||
* requests, but because that's asynchronous, it's not guaranteed that
|
||||
* the bgwriter receives the message in time.
|
||||
*/
|
||||
if (lstat(pathbuf, &statbuf) != 0)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
/* call ourselves recursively for a directory */
|
||||
if (!rmtree(pathbuf, true))
|
||||
{
|
||||
/* we already reported the error */
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlink(pathbuf) != 0)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rmtopdir)
|
||||
{
|
||||
if (rmdir(path) != 0)
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
|
||||
pgfnames_cleanup(filenames);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
23
dirmod.h
23
dirmod.h
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
* dirmod.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _DIRMOD_H_
|
||||
#define _DIRMOD_H_
|
||||
|
||||
#endif
|
||||
@@ -29,6 +29,7 @@
|
||||
#define ERR_DB_CON 6
|
||||
#define ERR_DB_QUERY 7
|
||||
#define ERR_PROMOTED 8
|
||||
#define ERR_BAD_PASSWORD 9
|
||||
#define ERR_STR_OVERFLOW 10
|
||||
#define ERR_FAILOVER_FAIL 11
|
||||
#define ERR_BAD_SSH 12
|
||||
@@ -36,7 +37,5 @@
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
#define ERR_BAD_BACKUP_LABEL 17
|
||||
#define ERR_SWITCHOVER_FAIL 18
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
7
log.c
7
log.c
@@ -40,8 +40,7 @@
|
||||
/* #define REPMGR_DEBUG */
|
||||
|
||||
static int detect_log_facility(const char *facility);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_NOTICE;
|
||||
@@ -49,7 +48,7 @@ int last_log_level = LOG_NOTICE;
|
||||
int verbose_logging = false;
|
||||
int terse_logging = false;
|
||||
|
||||
extern void
|
||||
void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
@@ -142,7 +141,7 @@ log_verbose(int level, const char *fmt, ...)
|
||||
|
||||
|
||||
bool
|
||||
logger_init(t_configuration_options *opts, const char *ident)
|
||||
logger_init(t_configuration_options * opts, const char *ident)
|
||||
{
|
||||
char *level = opts->loglevel;
|
||||
char *facility = opts->logfacility;
|
||||
|
||||
12
log.h
12
log.h
@@ -25,7 +25,7 @@
|
||||
#define REPMGR_SYSLOG 1
|
||||
#define REPMGR_STDERR 2
|
||||
|
||||
extern void
|
||||
void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
|
||||
@@ -123,14 +123,10 @@ bool logger_shutdown(void);
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
|
||||
void log_hint(const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
|
||||
void log_verbose(int level, const char *fmt, ...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
void log_hint(const char *fmt, ...);
|
||||
void log_verbose(int level, const char *fmt, ...);
|
||||
|
||||
extern int log_type;
|
||||
extern int log_level;
|
||||
extern int verbose_logging;
|
||||
extern int terse_logging;
|
||||
|
||||
#endif /* _REPMGR_LOG_H_ */
|
||||
#endif
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
# Replication Manager sample configuration file
|
||||
###################################################
|
||||
|
||||
# Some configuration items will be set with a default value; this
|
||||
# is noted for each item. Where no default value is shown, the
|
||||
# parameter will be treated as empty or false.
|
||||
|
||||
# Required configuration items
|
||||
# ============================
|
||||
#
|
||||
@@ -15,29 +11,21 @@
|
||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||
# to facilitate easier querying of the repmgr views and tables.
|
||||
#cluster=example_cluster
|
||||
cluster=example_cluster
|
||||
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
# replication function, as this will cause confusion when e.g.
|
||||
# replication funcion, as this will cause confusion when e.g.
|
||||
# "standby2" is promoted to primary)
|
||||
#node=2 # a unique integer
|
||||
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
node=2 # a unique integer
|
||||
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
# the server's hostname or another identifier unambiguously
|
||||
# associated with the server to avoid confusion
|
||||
|
||||
# Database connection information as a conninfo string
|
||||
# This must be accessible to all servers in the cluster; for details see:
|
||||
#
|
||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
#
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
#
|
||||
# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
|
||||
# conninfo string to determine the length of time which elapses before
|
||||
# a network connection attempt is abandoned; for details see:
|
||||
#
|
||||
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
|
||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
|
||||
# Optional configuration items
|
||||
# ============================
|
||||
@@ -45,17 +33,18 @@
|
||||
# Replication settings
|
||||
# ---------------------
|
||||
|
||||
# When using cascading replication, a standby can connect to another
|
||||
# upstream standby node which is specified by setting 'upstream_node'.
|
||||
# In that case, the upstream node must exist before the new standby
|
||||
# can be registered. If 'upstream_node' is not set, then the standby
|
||||
# will connect directly to the primary node.
|
||||
#upstream_node=1
|
||||
# when using cascading replication and a standby is to be connected to an
|
||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||
# must exist before the new standby can be registered. If a standby is
|
||||
# to connect directly to a primary node, this parameter is not required.
|
||||
#
|
||||
# upstream_node=1
|
||||
|
||||
# use physical replication slots - PostgreSQL 9.4 and later only
|
||||
# physical replication slots - PostgreSQL 9.4 and later only
|
||||
# (default: 0)
|
||||
#use_replication_slots=0
|
||||
|
||||
#
|
||||
# use_replication_slots=0
|
||||
#
|
||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||
# number of standbys which will connect to the primary.
|
||||
|
||||
@@ -64,15 +53,15 @@
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# (default: NOTICE)
|
||||
#loglevel=NOTICE
|
||||
loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# (default: STDERR)
|
||||
#logfacility=STDERR
|
||||
logfacility=STDERR
|
||||
|
||||
# stderr can be redirected to an arbitrary file:
|
||||
#
|
||||
#logfile='/var/log/repmgr/repmgr.log'
|
||||
# logfile='/var/log/repmgr.log'
|
||||
|
||||
# event notifications can be passed to an arbitrary external program
|
||||
# together with the following parameters:
|
||||
@@ -86,12 +75,12 @@
|
||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||
# so should be quoted in the provided command configuration, e.g.:
|
||||
#
|
||||
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
# event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones:
|
||||
#
|
||||
#event_notifications=master_register,standby_register,witness_create
|
||||
# event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
|
||||
# Environment/command settings
|
||||
@@ -99,17 +88,17 @@
|
||||
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
#pg_bindir=/usr/bin/
|
||||
# pg_bindir=/usr/bin/
|
||||
|
||||
# external command options
|
||||
|
||||
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
#ssh_options=-o "StrictHostKeyChecking no"
|
||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
# ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
# external command arguments. Values shown are examples.
|
||||
# external command arguments
|
||||
|
||||
#pg_ctl_options='-s'
|
||||
#pg_basebackup_options='--xlog-method=s'
|
||||
# pg_ctl_options='-s'
|
||||
# pg_basebackup_options='--xlog-method=s'
|
||||
|
||||
|
||||
# Standby clone settings
|
||||
@@ -121,52 +110,35 @@
|
||||
#
|
||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
||||
|
||||
# You can specify a restore_command to be used in the recovery.conf that
|
||||
# will be placed in the cloned standby
|
||||
#
|
||||
# restore_command = cp /path/to/archived/wals/%f %p
|
||||
|
||||
# Failover settings (repmgrd)
|
||||
# ---------------------------
|
||||
#
|
||||
# These settings are only applied when repmgrd is running. Values shown
|
||||
# are defaults.
|
||||
# These settings are only applied when repmgrd is running.
|
||||
|
||||
# Number of seconds to wait for a response from the primary server before
|
||||
# deciding it has failed.
|
||||
# deciding it has failed
|
||||
|
||||
#master_response_timeout=60
|
||||
master_response_timeout=60
|
||||
|
||||
# Number of attempts at what interval (in seconds) to try and
|
||||
# connect to a server to establish its status (e.g. master
|
||||
# during failover)
|
||||
#reconnect_attempts=6
|
||||
#reconnect_interval=10
|
||||
# Number of times to try and reconnect to the primary before starting
|
||||
# the failover procedure
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
|
||||
# Autofailover options
|
||||
#failover=manual # one of 'automatic', 'manual' (default: manual)
|
||||
# defines the action to take in the event of upstream failure
|
||||
#
|
||||
# 'automatic': repmgrd will automatically attempt to promote the
|
||||
# node or follow the new upstream node
|
||||
# 'manual': repmgrd will take no action and the mode will require
|
||||
# manual attention to reattach it to replication
|
||||
|
||||
#priority=100 # indicate a preferred priorty for promoting nodes
|
||||
# a value of zero or less prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
|
||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
failover=automatic # one of 'automatic', 'manual'
|
||||
priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# monitoring interval in seconds; default is 2
|
||||
#monitor_interval_secs=2
|
||||
#
|
||||
# monitor_interval_secs=2
|
||||
|
||||
# change wait time for primary; before we bail out and exit when the primary
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
#retry_promote_interval_secs=300
|
||||
|
||||
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||
#witness_repl_nodes_sync_interval_secs=15
|
||||
#
|
||||
# retry_promote_interval_secs=300
|
||||
|
||||
51
repmgr.h
51
repmgr.h
@@ -28,15 +28,18 @@
|
||||
#include "dbutils.h"
|
||||
#include "errcode.h"
|
||||
#include "config.h"
|
||||
#include "dirmod.h"
|
||||
|
||||
#define MIN_SUPPORTED_VERSION "9.3"
|
||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||
|
||||
#include "config.h"
|
||||
#define MAXFILENAME 1024
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
#define DEFAULT_DEST_DIR "."
|
||||
#define DEFAULT_MASTER_PORT "5432"
|
||||
#define DEFAULT_DBNAME "postgres"
|
||||
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define FAILOVER_NODES_MAX_CHECK 50
|
||||
@@ -47,15 +50,6 @@
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
#define UNKNOWN_NODE_ID -1
|
||||
|
||||
#define OPT_HELP 1
|
||||
#define OPT_CHECK_UPSTREAM_CONFIG 2
|
||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 3
|
||||
#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 4
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 5
|
||||
#define OPT_PG_REWIND 6
|
||||
#define OPT_PWPROMPT 7
|
||||
#define OPT_CSV 8
|
||||
#define OPT_INITDB_NO_PWPROMPT 9
|
||||
|
||||
|
||||
/* Run time options type */
|
||||
@@ -65,8 +59,8 @@ typedef struct
|
||||
char dbname[MAXLEN];
|
||||
char host[MAXLEN];
|
||||
char username[MAXLEN];
|
||||
char dest_dir[MAXPGPATH];
|
||||
char config_file[MAXPGPATH];
|
||||
char dest_dir[MAXFILENAME];
|
||||
char config_file[MAXFILENAME];
|
||||
char remote_user[MAXLEN];
|
||||
char superuser[MAXLEN];
|
||||
char wal_keep_segments[MAXLEN];
|
||||
@@ -75,50 +69,25 @@ typedef struct
|
||||
bool force;
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool witness_pwprompt;
|
||||
bool initdb_no_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
bool ignore_external_config_files;
|
||||
bool csv_mode;
|
||||
char masterport[MAXLEN];
|
||||
/*
|
||||
* configuration file parameters which can be overridden on the
|
||||
* command line
|
||||
*/
|
||||
char localport[MAXLEN];
|
||||
char loglevel[MAXLEN];
|
||||
|
||||
/* parameter used by STANDBY SWITCHOVER */
|
||||
char remote_config_file[MAXLEN];
|
||||
char pg_rewind[MAXPGPATH];
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||
char config_archive_dir[MAXLEN];
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
int keep_history;
|
||||
|
||||
char pg_bindir[MAXLEN];
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
|
||||
/* deprecated command line options */
|
||||
char localport[MAXLEN];
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", ""}
|
||||
|
||||
struct BackupLabel
|
||||
{
|
||||
XLogRecPtr start_wal_location;
|
||||
char start_wal_file[MAXLEN];
|
||||
XLogRecPtr checkpoint_location;
|
||||
char backup_from[MAXLEN];
|
||||
char backup_method[MAXLEN];
|
||||
char start_time[MAXLEN];
|
||||
char label[MAXLEN];
|
||||
XLogRecPtr min_failover_slot_lsn;
|
||||
};
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "", "", "", 0, "", "" }
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
extern bool config_file_found;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -59,12 +59,3 @@ WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monito
|
||||
ALTER VIEW repl_status OWNER TO repmgr;
|
||||
|
||||
CREATE INDEX idx_repl_status_sort ON repl_monitor(last_monitor_time, standby_node);
|
||||
|
||||
/*
|
||||
* This view shows the list of nodes with the information of which one is the upstream
|
||||
* in each case (when appliable)
|
||||
*/
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
SELECT rn.id, rn.conninfo, rn.type, rn.name, rn.cluster,
|
||||
rn.priority, rn.active, sq.name AS upstream_node_name
|
||||
FROM repl_nodes as rn LEFT JOIN repl_nodes AS sq ON sq.id=rn.upstream_node_id;
|
||||
|
||||
@@ -63,15 +63,6 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
|
||||
|
||||
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
||||
|
||||
/* There's also an event table which we need to create */
|
||||
CREATE TABLE repl_events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
/* When you're sure of your changes, commit them */
|
||||
|
||||
-- COMMIT;
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Update a repmgr 3.0 installation to repmgr 3.1
|
||||
* ----------------------------------------------
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statements listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- New view "repl_show_nodes" which also displays the server's
|
||||
-- upstream node
|
||||
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
SELECT rn.id, rn.conninfo, rn.type, rn.name, rn.cluster,
|
||||
rn.priority, rn.active, sq.name AS upstream_node_name
|
||||
FROM repl_nodes as rn LEFT JOIN repl_nodes AS sq ON sq.id=rn.upstream_node_id;
|
||||
|
||||
COMMIT;
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
|
||||
* --------------------------------------------------
|
||||
*
|
||||
* This update is only required if repmgrd is being used in conjunction
|
||||
* with a witness server.
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statement listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey,
|
||||
ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE;
|
||||
COMMIT;
|
||||
@@ -83,12 +83,7 @@ _PG_init(void)
|
||||
* resources in repmgr_shmem_startup().
|
||||
*/
|
||||
RequestAddinShmemSpace(repmgr_memsize());
|
||||
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
RequestNamedLWLockTranche("repmgr", 1);
|
||||
#else
|
||||
RequestAddinLWLocks(1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Install hooks.
|
||||
@@ -133,11 +128,7 @@ repmgr_shmem_startup(void)
|
||||
if (!found)
|
||||
{
|
||||
/* First time through ... */
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
|
||||
#else
|
||||
shared_state->lock = LWLockAssign();
|
||||
#endif
|
||||
snprintf(shared_state->location,
|
||||
sizeof(shared_state->location), "%X/%X", 0, 0);
|
||||
}
|
||||
|
||||
@@ -24,17 +24,12 @@
|
||||
#include <stdlib.h>
|
||||
#include "errcode.h"
|
||||
|
||||
|
||||
#define QUERY_STR_LEN 8192
|
||||
#define MAXLEN 1024
|
||||
#define MAXLINELENGTH 4096
|
||||
#define MAXVERSIONSTR 16
|
||||
#define MAXCONNINFO 1024
|
||||
|
||||
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
||||
#define STR(x) CppAsString(x)
|
||||
|
||||
#define MAXLEN_STR STR(MAXLEN)
|
||||
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
|
||||
Reference in New Issue
Block a user