mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
366 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
372f4f7d3d | ||
|
|
c4505248b0 | ||
|
|
5774d86ac7 | ||
|
|
29e7733152 | ||
|
|
b64385bee8 | ||
|
|
419ce28d3d | ||
|
|
90cc6a5cb4 | ||
|
|
1b175ddfcf | ||
|
|
eabe618738 | ||
|
|
3e621f43d1 | ||
|
|
15a531fed8 | ||
|
|
96255b988a | ||
|
|
8de0deddf9 | ||
|
|
bd19a2c868 | ||
|
|
2cadb3424d | ||
|
|
bfe4585b91 | ||
|
|
230773d626 | ||
|
|
81b7b3bae7 | ||
|
|
8cfc26d3ea | ||
|
|
5b1a4d0ef1 | ||
|
|
22423aa51a | ||
|
|
98df2a5891 | ||
|
|
6f61c8285b | ||
|
|
0875b2aafa | ||
|
|
3e2c9ed410 | ||
|
|
5b4f832f3b | ||
|
|
66844d057a | ||
|
|
f096cca84f | ||
|
|
0fbb83262f | ||
|
|
a0a3ef58b0 | ||
|
|
b007fc8b39 | ||
|
|
ae19c9bd5d | ||
|
|
1b4a8917ca | ||
|
|
ce66a7c2d2 | ||
|
|
9a3196b671 | ||
|
|
46a3082055 | ||
|
|
ebabc68f8a | ||
|
|
c757985640 | ||
|
|
172a3d90cf | ||
|
|
86d24759a0 | ||
|
|
7bd54b5a70 | ||
|
|
3e04c8e720 | ||
|
|
7f98bb7aec | ||
|
|
9e2736be4c | ||
|
|
4b3966d6a7 | ||
|
|
5a1036cea2 | ||
|
|
e21448831d | ||
|
|
155f5075cb | ||
|
|
9cfd6680b3 | ||
|
|
874616f149 | ||
|
|
61ce18ebbe | ||
|
|
922dfd88e5 | ||
|
|
b41235b896 | ||
|
|
0307c51d4b | ||
|
|
6d608aea7f | ||
|
|
5d26e27b48 | ||
|
|
2fa2dfff95 | ||
|
|
9e5b3e0a2d | ||
|
|
e0b82393b0 | ||
|
|
5c64f09889 | ||
|
|
af7dee05a4 | ||
|
|
bdb8ee1a6f | ||
|
|
3799d089a1 | ||
|
|
d06bd0ddea | ||
|
|
7fed433df1 | ||
|
|
9517624297 | ||
|
|
25ea635689 | ||
|
|
0d971d9009 | ||
|
|
5bba37cebd | ||
|
|
886a9fd036 | ||
|
|
94d0d119f6 | ||
|
|
96c8cd4148 | ||
|
|
619f95d85c | ||
|
|
97ae6dbf57 | ||
|
|
2929ed9be0 | ||
|
|
33037dd7fb | ||
|
|
36db199882 | ||
|
|
728b71c700 | ||
|
|
97c9525479 | ||
|
|
bf957ac173 | ||
|
|
e358c20b84 | ||
|
|
aaf219a694 | ||
|
|
c69e4e93f2 | ||
|
|
8a4f4bbd83 | ||
|
|
36a9e17bd3 | ||
|
|
95ac7e889b | ||
|
|
0a5457efb2 | ||
|
|
aa67a4b7e9 | ||
|
|
f8a0e45f5b | ||
|
|
6b8f96b590 | ||
|
|
e01807ea20 | ||
|
|
080bb81aeb | ||
|
|
d8fe1ebf47 | ||
|
|
9b7cb5b0c0 | ||
|
|
2c69119eff | ||
|
|
cc1e285d90 | ||
|
|
4ee84f4f05 | ||
|
|
ad83f8f12e | ||
|
|
ce254ccde3 | ||
|
|
c833dd65f9 | ||
|
|
7e615c5e0d | ||
|
|
a0a2f87d32 | ||
|
|
dce16d794c | ||
|
|
60b14ab107 | ||
|
|
65afc42afa | ||
|
|
0bba5ad792 | ||
|
|
f3d5a4a7b0 | ||
|
|
6597a03559 | ||
|
|
78dad4fc52 | ||
|
|
b152cccd69 | ||
|
|
6e5d4e0235 | ||
|
|
5ade2a1f2d | ||
|
|
cd2f74a840 | ||
|
|
fb28ee6f1c | ||
|
|
c02d226d2f | ||
|
|
fe1bd5fb91 | ||
|
|
bc63099dcf | ||
|
|
ef5ef9c13f | ||
|
|
02646165be | ||
|
|
d08bd352c1 | ||
|
|
53b990c65d | ||
|
|
1979fe63ca | ||
|
|
b0ce1fc801 | ||
|
|
28c2652580 | ||
|
|
e3b8a8fc3a | ||
|
|
74d925475e | ||
|
|
bd96e0ca72 | ||
|
|
918ee3811f | ||
|
|
606d0afabc | ||
|
|
e16c3b2c9a | ||
|
|
4e6c250830 | ||
|
|
29110a6e11 | ||
|
|
abf92883a8 | ||
|
|
bdf1696b58 | ||
|
|
7ad001e0c5 | ||
|
|
fb8296644d | ||
|
|
e603498f43 | ||
|
|
d7365535ab | ||
|
|
a55587d75a | ||
|
|
5a9b6eea1f | ||
|
|
a572efe929 | ||
|
|
c6b0f77923 | ||
|
|
66b399b886 | ||
|
|
2339adba6c | ||
|
|
ff63954c68 | ||
|
|
1a9fbd623f | ||
|
|
491309f4ba | ||
|
|
072c2d70ff | ||
|
|
1f9b19f3ff | ||
|
|
04fe820aff | ||
|
|
0f8759d316 | ||
|
|
2b6415b339 | ||
|
|
defb1e819b | ||
|
|
7ae4434f6e | ||
|
|
e3b734f177 | ||
|
|
187a6b6d23 | ||
|
|
238fc589a1 | ||
|
|
63b9254be4 | ||
|
|
44196de2f4 | ||
|
|
46888de77f | ||
|
|
6b1f55ff1b | ||
|
|
3d3f082617 | ||
|
|
db5db06244 | ||
|
|
dd7193715c | ||
|
|
f8f81f4bf1 | ||
|
|
27a4780cc3 | ||
|
|
b79bce5821 | ||
|
|
a69176fc1b | ||
|
|
65dcce55e8 | ||
|
|
ff30df3d96 | ||
|
|
74e4b6aa3c | ||
|
|
22dd164cbb | ||
|
|
63c416bb76 | ||
|
|
e2c2f97307 | ||
|
|
41e9a370af | ||
|
|
2eb242553a | ||
|
|
e3ecd3cdc5 | ||
|
|
32611f5f04 | ||
|
|
ba254d2f06 | ||
|
|
69f842bbc2 | ||
|
|
99e1c33668 | ||
|
|
6791bd9ad3 | ||
|
|
3e6659dda0 | ||
|
|
ce498e4d1e | ||
|
|
0762b28679 | ||
|
|
5dff3017c8 | ||
|
|
b453c6a533 | ||
|
|
09c7ba95ac | ||
|
|
e213f3b99d | ||
|
|
eabb3cb1ab | ||
|
|
e886e72f14 | ||
|
|
1803a16c7e | ||
|
|
4f36b2c085 | ||
|
|
19aba38327 | ||
|
|
2d82ade170 | ||
|
|
21730899da | ||
|
|
94bc5bdf80 | ||
|
|
1369fe5c79 | ||
|
|
e71dd4945e | ||
|
|
49d1abf130 | ||
|
|
01a33198fd | ||
|
|
1d28a2d08e | ||
|
|
d4578e023e | ||
|
|
9ceef937eb | ||
|
|
f047ec9526 | ||
|
|
a3f0e89a05 | ||
|
|
3d9d0d98af | ||
|
|
49debcdf92 | ||
|
|
7a760c32ff | ||
|
|
b4b5e6cd30 | ||
|
|
2ece014952 | ||
|
|
01360c3d39 | ||
|
|
031a726f04 | ||
|
|
5c67d47881 | ||
|
|
f40b3ac48a | ||
|
|
109269f7fb | ||
|
|
36d94c88ac | ||
|
|
23ef305afb | ||
|
|
99dae5cdcb | ||
|
|
b552710767 | ||
|
|
7e4c26b8a0 | ||
|
|
f8639a7878 | ||
|
|
f2309bd0a9 | ||
|
|
061e72d7cd | ||
|
|
0a19bf1e23 | ||
|
|
84a4766f13 | ||
|
|
3be8bf8e4c | ||
|
|
1e6f1a88b0 | ||
|
|
4a8912c2b4 | ||
|
|
3279e9e47e | ||
|
|
5c4e77f8e2 | ||
|
|
b09f987341 | ||
|
|
fe758eda9f | ||
|
|
c413cff461 | ||
|
|
609453a848 | ||
|
|
a82d37e48a | ||
|
|
4b6c097a3e | ||
|
|
3dfa33d01d | ||
|
|
a20afe28ec | ||
|
|
692204e381 | ||
|
|
2ae27521a3 | ||
|
|
d141d2a8aa | ||
|
|
fc6f5ddaa7 | ||
|
|
4b7fb70de3 | ||
|
|
5db6489ae5 | ||
|
|
96f6695a52 | ||
|
|
f006a79af9 | ||
|
|
3a8aa86e0c | ||
|
|
9cbac64ad4 | ||
|
|
242fc8416c | ||
|
|
355d8b8f01 | ||
|
|
8df7966540 | ||
|
|
1a790549ea | ||
|
|
b80d25ad33 | ||
|
|
a1a61e5ec3 | ||
|
|
1221b73293 | ||
|
|
93d5d7905f | ||
|
|
41a5274b44 | ||
|
|
d9f83cf620 | ||
|
|
905a2d0a5e | ||
|
|
526803c5e0 | ||
|
|
2ff24fecf2 | ||
|
|
a5ead16d6f | ||
|
|
5fb84b9627 | ||
|
|
437485bf6a | ||
|
|
822867ec24 | ||
|
|
5b88a980b8 | ||
|
|
e958b8f2d7 | ||
|
|
74a963a10e | ||
|
|
dd064cb47c | ||
|
|
17de82493a | ||
|
|
f2fa60f5cf | ||
|
|
acf2744ed7 | ||
|
|
7bafd490c1 | ||
|
|
7c15176646 | ||
|
|
97cbd7d557 | ||
|
|
5798241205 | ||
|
|
5b5b4cd4cd | ||
|
|
9dd78f34db | ||
|
|
a5b5e3d384 | ||
|
|
241622694d | ||
|
|
687872e979 | ||
|
|
3033f2dfaf | ||
|
|
718024454e | ||
|
|
e3379c0fbf | ||
|
|
ba4413ce91 | ||
|
|
e85e0732ef | ||
|
|
97fb3dbb14 | ||
|
|
8e09e7b57c | ||
|
|
4f37515113 | ||
|
|
413cc6eb54 | ||
|
|
f23c43b986 | ||
|
|
38de150436 | ||
|
|
a80da61203 | ||
|
|
4fbe8ca70e | ||
|
|
1aa12a122f | ||
|
|
c54d5f9a9c | ||
|
|
db49206fc5 | ||
|
|
017f66e15e | ||
|
|
e06949db87 | ||
|
|
2f6ce44cab | ||
|
|
1f21040fb3 | ||
|
|
4e9c58c7db | ||
|
|
3e983b258c | ||
|
|
9093a9d1ce | ||
|
|
4305f76fa9 | ||
|
|
7700086703 | ||
|
|
8b69b1e16f | ||
|
|
f94626bf7b | ||
|
|
4c64d52afb | ||
|
|
6f80cd5441 | ||
|
|
ee69730f8b | ||
|
|
9306726e06 | ||
|
|
44fa98f828 | ||
|
|
28fd9d5aeb | ||
|
|
24b2ee09b3 | ||
|
|
8faf41dd94 | ||
|
|
763881b1ed | ||
|
|
94f520e1de | ||
|
|
f72bb2f093 | ||
|
|
927667905e | ||
|
|
684d66f48e | ||
|
|
406c325ccb | ||
|
|
c23ee3830b | ||
|
|
dc0dfe9b56 | ||
|
|
313d787ebf | ||
|
|
253992c5ec | ||
|
|
47b7c4ce06 | ||
|
|
99ed9a065e | ||
|
|
a87d859e56 | ||
|
|
7350a8bf57 | ||
|
|
75baed233b | ||
|
|
5b9ac4585b | ||
|
|
2cbee90f35 | ||
|
|
fa7d8df534 | ||
|
|
0cbd5d3933 | ||
|
|
a0e4c99ab4 | ||
|
|
98c5215871 | ||
|
|
e40b9db0a6 | ||
|
|
54e62c3d65 | ||
|
|
bfd482bebc | ||
|
|
6a0fc43086 | ||
|
|
8f47111072 | ||
|
|
0b5b3aaa4b | ||
|
|
d8bba0de03 | ||
|
|
a11bb60fd5 | ||
|
|
4f810aee2c | ||
|
|
73d352b2a2 | ||
|
|
143aa57bb8 | ||
|
|
5b15fcff5c | ||
|
|
4469de533e | ||
|
|
30045f3bec | ||
|
|
d93d42fadb | ||
|
|
7c89a4d762 | ||
|
|
b3c68dead8 | ||
|
|
b9ab9010c0 | ||
|
|
2a6c835a5a | ||
|
|
2d48d5aee4 | ||
|
|
653e11c2a7 | ||
|
|
91c29fe2a2 | ||
|
|
573f1d3b2e | ||
|
|
0a6ff7faec | ||
|
|
98b1f8d28a | ||
|
|
9eba986833 | ||
|
|
164cf9d08f | ||
|
|
d8b8bf0e2a |
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2010-2014, 2ndQuadrant Limited
|
||||
Copyright (c) 2010-2015, 2ndQuadrant Limited
|
||||
All rights reserved.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
=====================================================
|
||||
PostgreSQL Automatic Fail-Over - User Documentation
|
||||
=====================================================
|
||||
====================================================
|
||||
PostgreSQL Automatic Failover - User Documentation
|
||||
====================================================
|
||||
|
||||
Automatic Failover
|
||||
==================
|
||||
|
||||
repmgr allows setups for automatic failover when it detects the failure of the master node.
|
||||
repmgr allows for automatic failover when it detects the failure of the master node.
|
||||
Following is a quick setup for this.
|
||||
|
||||
Installation
|
||||
@@ -14,34 +14,33 @@ Installation
|
||||
For convenience, we define:
|
||||
|
||||
**node1**
|
||||
is the hostname fully qualified of the Master server, IP 192.168.1.10
|
||||
is the fully qualified domain name of the Master server, IP 192.168.1.10
|
||||
**node2**
|
||||
is the hostname fully qualified of the Standby server, IP 192.168.1.11
|
||||
is the fully qualified domain name of the Standby server, IP 192.168.1.11
|
||||
**witness**
|
||||
is the hostname fully qualified of the server used for witness, IP 192.168.1.12
|
||||
is the fully qualified domain name of the server used as a witness, IP 192.168.1.12
|
||||
|
||||
**Note:** It is not recommanded to use name defining status of a server like «masterserver»,
|
||||
this is a name leading to confusion once a failover take place and the Master is
|
||||
**Note:** We don't recommend using names with the status of a server like «masterserver»,
|
||||
because it would be confusing once a failover takes place and the Master is
|
||||
now on the «standbyserver».
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
2 PostgreSQL servers are involved in the replication. Automatic fail-over need
|
||||
to vote to decide what server it should promote, thus an odd number is required
|
||||
and a witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
||||
2 PostgreSQL servers are involved in the replication. Automatic failover needs
|
||||
a vote to decide what server it should promote, so an odd number is required.
|
||||
A witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
||||
cluster to communicate with other repmgrd daemons.
|
||||
|
||||
1. Install PostgreSQL in all the servers involved (including the server used for
|
||||
witness)
|
||||
1. Install PostgreSQL in all the servers involved (including the witness server)
|
||||
|
||||
2. Install repmgr in all the servers involved (including the server used for witness)
|
||||
2. Install repmgr in all the servers involved (including the witness server)
|
||||
|
||||
3. Configure the Master PostreSQL
|
||||
|
||||
4. Clone the Master to the Standby using "repmgr standby clone" command
|
||||
|
||||
5. Configure repmgr in all the servers involved (including the server used for witness)
|
||||
5. Configure repmgr in all the servers involved (including the witness server)
|
||||
|
||||
6. Register Master and Standby nodes
|
||||
|
||||
@@ -66,14 +65,14 @@ Install repmgr following the steps in the README file.
|
||||
Configure PostreSQL
|
||||
-------------------
|
||||
|
||||
Log in node1.
|
||||
Log in to node1.
|
||||
|
||||
Edit the file postgresql.conf and modify the parameters::
|
||||
|
||||
listen_addresses='*'
|
||||
wal_level = 'hot_standby'
|
||||
archive_mode = on
|
||||
archive_command = 'cd .' # we can also use exit 0, anything that
|
||||
archive_command = 'cd .' # we can also use exit 0, anything that
|
||||
# just does nothing
|
||||
max_wal_senders = 10
|
||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||
@@ -117,9 +116,9 @@ Create the ssh-key for the postgres user and copy it to other servers::
|
||||
Clone Master
|
||||
------------
|
||||
|
||||
Log in node2.
|
||||
Log in to node2.
|
||||
|
||||
Clone the node1 (the current Master)::
|
||||
Clone node1 (the current Master)::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h node1 standby clone
|
||||
@@ -133,7 +132,7 @@ And check everything is fine in the server log.
|
||||
Configure repmgr
|
||||
----------------
|
||||
|
||||
Log in each server and configure repmgr by editing the file
|
||||
Log in to each server and configure repmgr by editing the file
|
||||
/etc/repmgr/repmgr.conf::
|
||||
|
||||
cluster=my_cluster
|
||||
@@ -154,13 +153,13 @@ Log in each server and configure repmgr by editing the file
|
||||
**node_name**
|
||||
is an identifier for every node.
|
||||
**conninfo**
|
||||
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
|
||||
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration you need to add a 'port=5499' to the conninfo.
|
||||
**master_response_timeout**
|
||||
is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
|
||||
is the maximum amount of time we are going to wait before deciding the master has died and start the failover procedure.
|
||||
**reconnect_attempts**
|
||||
is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
|
||||
is the number of times we will try to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**reconnect_interval**
|
||||
is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
|
||||
is the amount of time between retries to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**failover**
|
||||
configure behavior: *manual* or *automatic*.
|
||||
**promote_command**
|
||||
@@ -171,14 +170,14 @@ Log in each server and configure repmgr by editing the file
|
||||
Register Master and Standby
|
||||
---------------------------
|
||||
|
||||
Log in node1.
|
||||
Log in to node1.
|
||||
|
||||
Register the node as Master::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||
|
||||
Log in node2. Register it as a standby::
|
||||
Log in to node2. Register it as a standby::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf standby register
|
||||
@@ -186,38 +185,45 @@ Log in node2. Register it as a standby::
|
||||
Initialize witness server
|
||||
-------------------------
|
||||
|
||||
Log in witness.
|
||||
Log in to witness.
|
||||
|
||||
Initialize the witness server::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
|
||||
|
||||
It needs information to connect to the master to copy the configuration of the cluster, also it needs to know where it should initialize it's own $PGDATA.
|
||||
As part of the procees it also ask for the superuser password so it can connect when needed.
|
||||
The witness server needs the following information from the command
|
||||
line:
|
||||
|
||||
* Connection details for the current master, to copy the cluster
|
||||
configuration.
|
||||
* A location for initializing its own $PGDATA.
|
||||
|
||||
repmgr will also ask for the superuser password on the witness database so
|
||||
it can reconnect when needed (the command line option --initdb-no-pwprompt
|
||||
will set up a password-less superuser).
|
||||
|
||||
Start the repmgrd daemons
|
||||
-------------------------
|
||||
|
||||
Log in node2 and witness.
|
||||
Log in to node2 and witness::
|
||||
|
||||
su - postgres
|
||||
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
|
||||
su - postgres
|
||||
repmgrd -f /etc/repmgr/repmgr.conf --daemonize -> /var/log/postgresql/repmgr.log 2>&1
|
||||
|
||||
**Note:** The Master does not need a repmgrd daemon.
|
||||
|
||||
|
||||
Suspend Automatic behavior
|
||||
==========================
|
||||
|
||||
Edit the repmgr.conf of the node to remove from automatic processing and change::
|
||||
|
||||
failover=manual
|
||||
failover=manual
|
||||
|
||||
Then, signal repmgrd daemon::
|
||||
|
||||
su - postgres
|
||||
kill -HUP `pidof repmgrd`
|
||||
su - postgres
|
||||
kill -HUP $(pidof repmgrd)
|
||||
|
||||
Usage
|
||||
=====
|
||||
119
FAQ.md
Normal file
119
FAQ.md
Normal file
@@ -0,0 +1,119 @@
|
||||
FAQ - Frequently Asked Questions about repmgr
|
||||
=============================================
|
||||
|
||||
This FAQ applies to `repmgr` 3.0 and later.
|
||||
|
||||
General
|
||||
-------
|
||||
|
||||
- What's the difference between the repmgr versions?
|
||||
|
||||
repmgr 3.x builds on the improved replication facilities added
|
||||
in PostgreSQL 9.3, as well as improved automated failover support
|
||||
via `repmgrd`, and is not compatible with PostgreSQL 9.2 and earlier.
|
||||
|
||||
repmgr 2.x supports PostgreSQL 9.0 onwards. While it is compatible
|
||||
with PostgreSQL 9.3 and later, we recommend repmgr v3.
|
||||
|
||||
- What's the advantage of using replication slots?
|
||||
|
||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
||||
master server will retain WAL files until they have been consumed
|
||||
by all standby servers. This makes WAL file management much easier,
|
||||
and if used `repmgr` will no longer insist on a fixed number (default: 5000)
|
||||
of WAL files being preserved.
|
||||
|
||||
(However this does mean that if a standby is no longer connected to the
|
||||
master, the master will retain WAL files indefinitely).
|
||||
|
||||
- How many replication slots should I define in `max_replication_slots`?
|
||||
|
||||
Normally at least same number as the number of standbys which will connect
|
||||
to the node. Note that changes to `max_replication_slots` require a server
|
||||
restart to take effect, and as there is no particular penalty for unused
|
||||
replication slots, setting a higher figure will make adding new nodes
|
||||
easier.
|
||||
|
||||
|
||||
`repmgr`
|
||||
--------
|
||||
|
||||
- When should I use the --rsync-only option?
|
||||
|
||||
By default, `repmgr` uses `pg_basebackup` to clone a standby from
|
||||
a master. However, `pg_basebackup` copies the entire data directory, which
|
||||
can take some time depending on installation size. If you have an
|
||||
existing but "stale" standby, `repmgr` can use `rsync` instead,
|
||||
which means only changed or added files need to be copied.
|
||||
|
||||
- Can I register an existing master/standby?
|
||||
|
||||
Yes, this is no problem.
|
||||
|
||||
- How can a failed master be re-added as a standby?
|
||||
|
||||
This is a two-stage process. First, the failed master's data directory
|
||||
must be re-synced with the current master; secondly the failed master
|
||||
needs to be re-registered as a standby. The section "Converting a failed
|
||||
master to a standby" in the `README.md` file contains more detailed
|
||||
information on this process.
|
||||
|
||||
- Is there an easy way to check my master server is correctly configured
|
||||
for use with `repmgr`?
|
||||
|
||||
Yes - execute `repmgr` with the `--check-upstream-config` option, and it
|
||||
will let you know which items in `postgresql.conf` need to be modified.
|
||||
|
||||
- Even though I specified custom `rsync` options, `repmgr` appends
|
||||
the `--checksum` - why?
|
||||
|
||||
When syncing a stale data directory from an active server, it's
|
||||
essential that `rsync` compares the content of files rather than
|
||||
just timestamp and size, to ensure that all changed files are
|
||||
copied and prevent corruption.
|
||||
|
||||
- When cloning a standby, how can I prevent `repmgr` from copying
|
||||
`postgresql.conf` and `pg_hba.conf` from the PostgreSQL configuration
|
||||
directory in `/etc`?
|
||||
|
||||
Use the command line option `--ignore-external-config-files`
|
||||
|
||||
- How can I prevent `repmgr` from copying local configuration files
|
||||
in the data directory?
|
||||
|
||||
If you're updating an existing but stale data directory which
|
||||
contains e.g. configuration files you don't want to be overwritten
|
||||
with the same file from the master, specify the files in the
|
||||
`rsync_options` configuration option, e.g.
|
||||
|
||||
rsync_options=--exclude=postgresql.local.conf
|
||||
|
||||
This option is only available when using the `--rsync-only` option.
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
|
||||
- Do I need a witness server?
|
||||
|
||||
Not necessarily. However if you have an uneven number of nodes spread
|
||||
over more than one network segment, a witness server will enable
|
||||
better handling of a 'split brain' situation by providing a "casting
|
||||
vote" on the preferred network segment.
|
||||
|
||||
- How can I prevent a node from ever being promoted to master?
|
||||
|
||||
In `rempgr.conf`, set its priority to a value of 0 or less.
|
||||
|
||||
- Does `repmgrd` support delayed standbys?
|
||||
|
||||
`repmgrd` can monitor delayed standbys - those set up with
|
||||
`recovery_min_apply_delay` set to a non-zero value in `recovery.conf` -
|
||||
but as it's not currently possible to directly examine the value
|
||||
applied to the standby, `repmgrd` may not be able to properly evaluate
|
||||
the node as a promotion candidate.
|
||||
|
||||
We recommend that delayed standbys are explicitly excluded from promotion
|
||||
by setting `priority` to 0 in `repmgr.conf`.
|
||||
|
||||
Note that after registering a delayed standby, `repmgrd` will only start
|
||||
once the metadata added in the master node has been replicated.
|
||||
26
HISTORY
26
HISTORY
@@ -1,7 +1,15 @@
|
||||
2.0.3 2015-04-16
|
||||
Add -S/--superuser option for witness database creation Ian)
|
||||
Add -c/--fast-checkpoint option for cloning (Christoph)
|
||||
Add option "--initdb-no-pwprompt" (Ian)
|
||||
3.0
|
||||
Require PostgreSQL 9.3 or later (Ian)
|
||||
Use `pg_basebackup` by default (instead of `rsync`) to clone standby servers (Ian)
|
||||
Use `pg_ctl promote` to promote a standby to primary
|
||||
Enable tablespace remapping using `pg_basebackup` (in PostgreSQL 9.3 with `rsync`) (Ian)
|
||||
Support cascaded standbys (Ian)
|
||||
"pg_bindir" no longer required as a configuration parameter (Ian)
|
||||
Enable replication slots to be used (PostgreSQL 9.4 and later (Ian)
|
||||
Command line option "--check-upstream-config" (Ian)
|
||||
Add event logging table and option to execute an external program when an event occurs (Ian)
|
||||
General usability and logging message improvements (Ian)
|
||||
Code consolidation and cleanup (Ian)
|
||||
|
||||
2.0.2 2015-02-17
|
||||
Add "--checksum" in rsync when using "--force" (Jaime)
|
||||
@@ -37,7 +45,7 @@
|
||||
Add a ssh_options parameter (Jay Taylor)
|
||||
|
||||
2.0beta1 2012-07-27
|
||||
Make CLONE command try to make an exact copy including $PGDATA location (Cedric)
|
||||
Make CLONE command try to make an exact copy including $PGDATA location (Cedric)
|
||||
Add detection of master failure (Jaime)
|
||||
Add the notion of a witness server (Jaime)
|
||||
Add autofailover capabilities (Jaime)
|
||||
@@ -45,15 +53,15 @@
|
||||
Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime)
|
||||
Add tunables to specify number of retries to reconnect to master and the time between them (Jaime)
|
||||
|
||||
1.2.0 2012-07-27
|
||||
Test ssh connection before trying to rsync (Cédric)
|
||||
1.2.0 2012-07-27
|
||||
Test ssh connection before trying to rsync (Cédric)
|
||||
Add CLUSTER SHOW command (Carlo)
|
||||
Add CLUSTER CLEANUP command (Jaime)
|
||||
Add function write_primary_conninfo (Marco)
|
||||
Teach repmgr how to get tablespace's location in different pg version (Jaime)
|
||||
Improve version message (Carlo)
|
||||
Improve version message (Carlo)
|
||||
|
||||
1.1.1 2012-04-18
|
||||
1.1.1 2012-04-18
|
||||
Add --ignore-rsync-warning (Cédric)
|
||||
Add strnlen for compatibility with OS X (Greg)
|
||||
Improve performance of the repl_status view (Jaime)
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,6 +1,6 @@
|
||||
#
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010-2014
|
||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
||||
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
|
||||
123
PACKAGES.md
Normal file
123
PACKAGES.md
Normal file
@@ -0,0 +1,123 @@
|
||||
Packaging
|
||||
=========
|
||||
|
||||
Notes on RedHat Linux, Fedora, and CentOS Builds
|
||||
------------------------------------------------
|
||||
|
||||
The RPM packages of PostgreSQL put ``pg_config`` into the ``postgresql-devel``
|
||||
package, not the main server one. And if you have a RPM install of PostgreSQL
|
||||
9.0, the entire PostgreSQL binary directory will not be in your PATH by default
|
||||
either. Individual utilities are made available via the ``alternatives``
|
||||
mechanism, but not all commands will be wrapped that way. The files installed
|
||||
by repmgr will certainly not be in the default PATH for the postgres user
|
||||
on such a system. They will instead be in /usr/pgsql-9.0/bin/ on this
|
||||
type of system.
|
||||
|
||||
When building repmgr against a RPM packaged build, you may discover that some
|
||||
development packages are needed as well. The following build errors can
|
||||
occur::
|
||||
|
||||
/usr/bin/ld: cannot find -lxslt
|
||||
/usr/bin/ld: cannot find -lpam
|
||||
|
||||
Install the following packages to correct those::
|
||||
|
||||
yum install libxslt-devel
|
||||
yum install pam-devel
|
||||
|
||||
If building repmgr as a regular user, then doing the install into the system
|
||||
directories using sudo, the syntax is hard. ``pg_config`` won't be in root's
|
||||
path either. The following recipe should work::
|
||||
|
||||
sudo PATH="/usr/pgsql-9.0/bin:$PATH" make USE_PGXS=1 install
|
||||
|
||||
Issues with 32 and 64 bit RPMs
|
||||
------------------------------
|
||||
|
||||
If when building, you receive a series of errors of this form::
|
||||
|
||||
/usr/bin/ld: skipping incompatible /usr/pgsql-9.0/lib/libpq.so when searching for -lpq
|
||||
|
||||
This is likely because you have both the 32 and 64 bit versions of the
|
||||
``postgresql90-devel`` package installed. You can check that like this::
|
||||
|
||||
rpm -qa --queryformat '%{NAME}\t%{ARCH}\n' | grep postgresql90-devel
|
||||
|
||||
And if two packages appear, one for i386 and one for x86_64, that's not supposed
|
||||
to be allowed.
|
||||
|
||||
This can happen when using the PGDG repo to install that package;
|
||||
here is an example sessions demonstrating the problem case appearing::
|
||||
|
||||
# yum install postgresql-devel
|
||||
..
|
||||
Setting up Install Process
|
||||
Resolving Dependencies
|
||||
--> Running transaction check
|
||||
---> Package postgresql90-devel.i386 0:9.0.2-2PGDG.rhel5 set to be updated
|
||||
---> Package postgresql90-devel.x86_64 0:9.0.2-2PGDG.rhel5 set to be updated
|
||||
--> Finished Dependency Resolution
|
||||
|
||||
Dependencies Resolved
|
||||
|
||||
=========================================================================
|
||||
Package Arch Version Repository Size
|
||||
=========================================================================
|
||||
Installing:
|
||||
postgresql90-devel i386 9.0.2-2PGDG.rhel5 pgdg90 1.5 M
|
||||
postgresql90-devel x86_64 9.0.2-2PGDG.rhel5 pgdg90 1.6 M
|
||||
|
||||
Note how both the i386 and x86_64 platform architectures are selected for
|
||||
installation. Your main PostgreSQL package will only be compatible with one of
|
||||
those, and if the repmgr build finds the wrong postgresql90-devel these
|
||||
"skipping incompatible" messages appear.
|
||||
|
||||
In this case, you can temporarily remove both packages, then just install the
|
||||
correct one for your architecture. Example::
|
||||
|
||||
rpm -e postgresql90-devel --allmatches
|
||||
yum install postgresql90-devel-9.0.2-2PGDG.rhel5.x86_64
|
||||
|
||||
Instead just deleting the package from the wrong platform might not leave behind
|
||||
the correct files, due to the way in which these accidentally happen to interact.
|
||||
If you already tried to build repmgr before doing this, you'll need to do::
|
||||
|
||||
make USE_PGXS=1 clean
|
||||
|
||||
to get rid of leftover files from the wrong architecture.
|
||||
|
||||
Notes on Ubuntu, Debian or other Debian-based Builds
|
||||
----------------------------------------------------
|
||||
|
||||
The Debian packages of PostgreSQL put ``pg_config`` into the development package
|
||||
called ``postgresql-server-dev-$version``.
|
||||
|
||||
When building repmgr against a Debian packages build, you may discover that some
|
||||
development packages are needed as well. You will need the following development
|
||||
packages installed::
|
||||
|
||||
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
||||
|
||||
If your using Debian packages for PostgreSQL and are building repmgr with the
|
||||
USE_PGXS option you also need to install the corresponding development package::
|
||||
|
||||
sudo apt-get install postgresql-server-dev-9.0
|
||||
|
||||
If you build and install repmgr manually it will not be on the system path. The
|
||||
binaries will be installed in /usr/lib/postgresql/$version/bin/ which is not on
|
||||
the default path. The reason behind this is that Ubuntu/Debian systems manage
|
||||
multiple installed versions of PostgreSQL on the same system through a wrapper
|
||||
called pg_wrapper and repmgr is not (yet) known to this wrapper.
|
||||
|
||||
You can solve this in many different ways, the most Debian like is to make an
|
||||
alternate for repmgr and repmgrd::
|
||||
|
||||
sudo update-alternatives --install /usr/bin/repmgr repmgr /usr/lib/postgresql/9.0/bin/repmgr 10
|
||||
sudo update-alternatives --install /usr/bin/repmgrd repmgrd /usr/lib/postgresql/9.0/bin/repmgrd 10
|
||||
|
||||
You can also make a deb package of repmgr using::
|
||||
|
||||
make USE_PGXS=1 deb
|
||||
|
||||
This will build a Debian package one level up from where you build, normally the
|
||||
same directory that you have your repmgr/ directory in.
|
||||
288
QUICKSTART.md
288
QUICKSTART.md
@@ -1,135 +1,12 @@
|
||||
repmgr: Quickstart guide
|
||||
========================
|
||||
repmgr quickstart guide
|
||||
=======================
|
||||
|
||||
`repmgr` is an open-source tool suite for mananaging replication and failover
|
||||
among multiple PostgreSQL server nodes. It enhances PostgreSQL's built-in
|
||||
hot-standby capabilities with a set of administration tools for monitoring
|
||||
replication, setting up standby servers and performing failover/switchover
|
||||
operations.
|
||||
|
||||
This quickstart guide assumes you are familiar with PostgreSQL replication
|
||||
setup and Linux/UNIX system administration. For a more detailed tutorial
|
||||
covering setup on a variety of different systems, see the README.rst file.
|
||||
|
||||
Conceptual Overview
|
||||
-------------------
|
||||
|
||||
`repmgr` provides two binaries:
|
||||
|
||||
- `repmgr`: a command-line client to manage replication and `repmgr` configuration
|
||||
- `repmgrd`: an optional daemon process which runs on standby nodes to monitor
|
||||
replication and node status
|
||||
|
||||
Each PostgreSQL node requires a `repmgr.conf` configuration file; additionally
|
||||
it must be "registered" using the `repmgr` command-line client. `repmgr` stores
|
||||
information about managed nodes in a custom schema on the node's current master
|
||||
database.
|
||||
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
`repmgr` works with PostgreSQL 9.0 and later. All server nodes must be running the
|
||||
same PostgreSQL major version, and preferably should be running the same minor
|
||||
version.
|
||||
|
||||
`repmgr` will work on any Linux or UNIX-like environment capable of running
|
||||
PostgreSQL. `rsync` must also be installed.
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
`repmgr` must be installed on each PostgreSQL server node.
|
||||
|
||||
* Packages
|
||||
- RPM packages for RedHat-based distributions are available from PGDG
|
||||
- Debian/Ubuntu provide .deb packages.
|
||||
|
||||
It is also possible to build .deb packages directly from the `repmgr` source;
|
||||
see README.rst for further details.
|
||||
|
||||
* Source installation
|
||||
- `repmgr` source code is hosted at github (https://github.com/2ndQuadrant/repmgr);
|
||||
tar.gz files can be downloaded from https://github.com/2ndQuadrant/repmgr/releases .
|
||||
|
||||
`repmgr` can be built easily using PGXS:
|
||||
|
||||
sudo make USE_PGXS=1 install
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
### Server configuration
|
||||
|
||||
Password-less SSH logins must be enabled for the database system user (typically `postgres`)
|
||||
between all server nodes to enable `repmgr` to copy required files.
|
||||
|
||||
### PostgreSQL configuration
|
||||
|
||||
The master PostgreSQL node needs to be configured for replication with the
|
||||
following settings:
|
||||
|
||||
wal_level = 'hot_standby' # minimal, archive, hot_standby, or logical
|
||||
archive_mode = on # allows archiving to be done
|
||||
archive_command = 'cd .' # command to use to archive a logfile segment
|
||||
max_wal_senders = 10 # max number of walsender processes
|
||||
wal_keep_segments = 5000 # in logfile segments, 16MB each; 0 disables
|
||||
hot_standby = on # "on" allows queries during recovery
|
||||
|
||||
Note that `repmgr` expects a default of 5000 wal_keep_segments, although this
|
||||
value can be overridden when executing the `repmgr` client.
|
||||
|
||||
Additionally, `repmgr` requires a dedicated PostgreSQL superuser account
|
||||
and a database in which to store monitoring and replication data. The `repmgr`
|
||||
user account will also be used for replication connections from the standby,
|
||||
so a seperate replication user with the `REPLICATION` privilege is not required.
|
||||
The database can in principle be any database, including the default `postgres`
|
||||
one, however it's probably advisable to create a dedicated database for `repmgr`
|
||||
usage.
|
||||
|
||||
|
||||
### repmgr configuration
|
||||
|
||||
Each PostgreSQL node requires a `repmgr.conf` configuration file containing
|
||||
identification and database connection information:
|
||||
|
||||
cluster=test
|
||||
node=1
|
||||
node_name=node1
|
||||
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
* `cluster`: common name for the replication cluster; this must be the same on all nodes
|
||||
* `node`: a unique, abitrary integer identifier
|
||||
* `name`: a unique, human-readable name
|
||||
* `conninfo`: a standard conninfo string enabling repmgr to connect to the
|
||||
control database; user and name must be the same on all nodes, while other
|
||||
parameters such as port may differ. The `host` parameter *must* be a hostname
|
||||
resolvable by all nodes on the cluster.
|
||||
* `pg_bindir`: (optional) location of PostgreSQL binaries, if not in the default $PATH
|
||||
|
||||
Note that the configuration file should *not* be stored inside the PostgreSQL
|
||||
data directory. The configuration file can be specified with the
|
||||
`-f, --config-file=PATH` option and can have any arbitrary name. If no
|
||||
configuration file is specified, `repmgr` will search for `repmgr.conf`
|
||||
in the current working directory.
|
||||
|
||||
Each node configuration needs to be registered with `repmgr`, either using the
|
||||
`repmgr` command line tool, or the `repmgrd` daemon; for details see below. Details
|
||||
about each node are inserted into the `repmgr` database (for details see below).
|
||||
|
||||
|
||||
Replication setup and monitoring
|
||||
--------------------------------
|
||||
This quickstart guide provides some annotated examples on basic
|
||||
`repmgr` setup. It assumes you are familiar with PostgreSQL replication
|
||||
concepts setup and Linux/UNIX system administration.
|
||||
|
||||
For the purposes of this guide, we'll assume the database user will be
|
||||
`repmgr_usr` and the database will be `repmgr_db`, and that the following
|
||||
environment variables are set on each node:
|
||||
|
||||
- $HOME: the PostgreSQL system user's home directory
|
||||
- $PGDATA: the PostgreSQL data directory
|
||||
`repmgr_usr` and the database will be `repmgr_db`.
|
||||
|
||||
|
||||
Master setup
|
||||
@@ -154,9 +31,10 @@ Master setup
|
||||
```
|
||||
|
||||
Restart the PostgreSQL server after making these changes.
|
||||
|
||||
2. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat $HOME/repmgr/repmgr.conf
|
||||
$ cat /path/to/repmgr/node1/repmgr.conf
|
||||
cluster=test
|
||||
node=1
|
||||
node_name=node1
|
||||
@@ -168,36 +46,32 @@ Master setup
|
||||
|
||||
3. Register the master node with `repmgr`:
|
||||
|
||||
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose master register
|
||||
[2014-07-04 10:43:42] [INFO] repmgr mgr connecting to master database
|
||||
[2014-07-04 10:43:42] [INFO] repmgr connected to master, checking its state
|
||||
[2014-07-04 10:43:42] [INFO] master register: creating database objects inside the repmgr_test schema
|
||||
[2014-07-04 10:43:43] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
$ repmgr -f /path/to/repmgr/node1/repmgr.conf --verbose master register
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 17:45:53] [INFO] master register: creating database objects inside the repmgr_test schema
|
||||
[2015-03-03 17:45:53] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
Standby setup
|
||||
-------------
|
||||
|
||||
Slave/standby setup
|
||||
-------------------
|
||||
1. Use `repmgr standby clone` to clone a standby from the master:
|
||||
|
||||
1. Use `repmgr` to clone the master:
|
||||
|
||||
$ repmgr -D $PGDATA -d repmgr_db -U repmgr_usr -R postgres --verbose standby clone 192.168.1.2
|
||||
Opening configuration file: ./repmgr.conf
|
||||
[2014-07-04 10:49:00] [ERROR] Did not find the configuration file './repmgr.conf', continuing
|
||||
[2014-07-04 10:49:00] [INFO] repmgr connecting to master database
|
||||
[2014-07-04 10:49:00] [INFO] repmgr connected to master, checking its state
|
||||
[2014-07-04 10:49:00] [INFO] Successfully connected to primary. Current installation size is 1807 MB
|
||||
[2014-07-04 10:49:00] [NOTICE] Starting backup...
|
||||
[2014-07-04 10:49:00] [INFO] creating directory "/path/to/data/"...
|
||||
(...)
|
||||
[2014-07-04 10:53:19] [NOTICE] Finishing backup...
|
||||
repmgr -D /path/to/standby/data -d repmgr_db -U repmgr_usr --verbose standby clone 192.168.1.2
|
||||
[2015-03-03 18:18:21] [NOTICE] No configuration file provided and default file './repmgr.conf' not found - continuing with default values
|
||||
[2015-03-03 18:18:21] [NOTICE] repmgr Destination directory ' /path/to/standby/data' provided
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connecting to upstream node
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connected to upstream node, checking its state
|
||||
[2015-03-03 18:18:21] [INFO] Successfully connected to upstream node. Current installation size is 27 MB
|
||||
[2015-03-03 18:18:21] [NOTICE] Starting backup...
|
||||
[2015-03-03 18:18:21] [INFO] creating directory " /path/to/standby/data"...
|
||||
[2015-03-03 18:18:21] [INFO] Executing: 'pg_basebackup -l "repmgr base backup" -h localhost -p 9595 -U repmgr_usr -D /path/to/standby/data '
|
||||
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
|
||||
[2014-07-04 10:53:21] [INFO] repmgr requires primary to keep WAL files 0000000100000000000000AD until at least 0000000100000000000000AD
|
||||
[2014-07-04 10:53:21] [NOTICE] repmgr standby clone complete
|
||||
[2014-07-04 10:53:21] [NOTICE] HINT: You can now start your postgresql server
|
||||
[2014-07-04 10:53:21] [NOTICE] for example : /etc/init.d/postgresql start
|
||||
[2015-03-03 18:18:23] [NOTICE] repmgr standby clone (using pg_basebackup) complete
|
||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
||||
|
||||
-R is the database system user on the master node. At this point it does not matter
|
||||
if the `repmgr.conf` file is not found.
|
||||
Note that at this point it does not matter if the `repmgr.conf` file is not found.
|
||||
|
||||
This will clone the PostgreSQL database files from the master, including its
|
||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||
@@ -208,97 +82,33 @@ Slave/standby setup
|
||||
|
||||
3. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat $HOME/repmgr/repmgr.conf
|
||||
$ cat /path/node2/repmgr/repmgr.conf
|
||||
cluster=test
|
||||
node=2
|
||||
node_name=node2
|
||||
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
4. Register the master node with `repmgr`:
|
||||
4. Register the standby node with `repmgr`:
|
||||
|
||||
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose standby register
|
||||
Opening configuration file: /path/to/repmgr/repmgr.conf
|
||||
[2014-07-04 11:48:13] [INFO] repmgr connecting to standby database
|
||||
[2014-07-04 11:48:13] [INFO] repmgr connected to standby, checking its state
|
||||
[2014-07-04 11:48:13] [INFO] repmgr connecting to master database
|
||||
[2014-07-04 11:48:13] [INFO] finding node list for cluster 'test'
|
||||
[2014-07-04 11:48:13] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||
[2014-07-04 11:48:13] [INFO] repmgr connected to master, checking its state
|
||||
[2014-07-04 11:48:13] [INFO] repmgr registering the standby
|
||||
[2014-07-04 11:48:13] [INFO] repmgr registering the standby complete
|
||||
[2014-07-04 11:48:13] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
||||
and which and can automate remote actions. It can be started simply with e.g.:
|
||||
|
||||
repmgrd -f $HOME/repmgr/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
||||
|
||||
or alternatively:
|
||||
|
||||
repmgrd -f $HOME/repmgr/repmgr.conf --verbose --monitoring-history > $HOME/repmgr/repmgrd.log 2>&1
|
||||
|
||||
which will track advance or lag of the replication in every standby in the
|
||||
`repl_monitor` table.
|
||||
|
||||
Example log output:
|
||||
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to database 'host=localhost user=repmgr_usr dbname=repmgr_db'
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Connected to database, checking its state
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to primary for cluster 'test'
|
||||
[2014-07-04 11:55:17] [INFO] finding node list for cluster 'test'
|
||||
[2014-07-04 11:55:17] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Checking cluster configuration with schema 'repmgr_test'
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Checking node 2 in cluster 'test'
|
||||
[2014-07-04 11:55:17] [INFO] Reloading configuration file and updating repmgr tables
|
||||
[2014-07-04 11:55:17] [INFO] repmgrd Starting continuous standby node monitoring
|
||||
$ repmgr -f /path/to/repmgr/node2/repmgr.conf --verbose standby register
|
||||
[2015-03-03 18:24:34] [NOTICE] Opening configuration file: /path/to/repmgr/node2/repmgr.conf
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to standby database
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 18:24:34] [INFO] finding node list for cluster 'test'
|
||||
[2015-03-03 18:24:34] [INFO] checking role of cluster node '1'
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby complete
|
||||
[2015-03-03 18:24:34] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
|
||||
Failover
|
||||
--------
|
||||
|
||||
To promote a standby to master, on the standby execute e.g.:
|
||||
|
||||
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby promote
|
||||
|
||||
`repmgr` will attempt to connect to the current master to verify that it
|
||||
is not available (if it is, `repmgr` will not promote the standby).
|
||||
|
||||
Other standby servers need to be told to follow the new master with:
|
||||
|
||||
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby follow
|
||||
|
||||
See file `autofailover_quick_setup.rst` for details on setting up
|
||||
automated failover.
|
||||
|
||||
|
||||
repmgr database schema
|
||||
----------------------
|
||||
|
||||
`repmgr` creates a small schema for its own use in the database specified in
|
||||
each node's conninfo configuration parameter. This database can in principle
|
||||
be any database. The schema name is the global `cluster` name prefixed
|
||||
with `repmgr_`, so for the example setup above the schema name is
|
||||
`repmgr_test`.
|
||||
|
||||
The schema contains two tables:
|
||||
|
||||
* `repl_nodes`
|
||||
stores information about all registered servers in the cluster
|
||||
* `repl_monitor`
|
||||
stores monitoring information about each node
|
||||
|
||||
and one view, `repl_status`, which summarizes the latest monitoring information
|
||||
for each node.
|
||||
|
||||
|
||||
Further reading
|
||||
---------------
|
||||
|
||||
* http://blog.2ndquadrant.com/announcing-repmgr-2-0/
|
||||
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
||||
This concludes the basic `repmgr` setup of master and standby. The records
|
||||
created in the `repl_nodes` table should look something like this:
|
||||
|
||||
repmgr_db=# SELECT * from repmgr_test.repl_nodes;
|
||||
id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active
|
||||
----+---------+------------------+---------+-------+-------------------------------------------------+-----------+----------+--------
|
||||
1 | primary | | test | node1 | host=localhost user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
2 | standby | 1 | test | node2 | host=localhost user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
(2 rows)
|
||||
|
||||
619
README.md
Normal file
619
README.md
Normal file
@@ -0,0 +1,619 @@
|
||||
repmgr: Replication Manager for PostgreSQL
|
||||
==========================================
|
||||
|
||||
`repmgr` is an open-source tool to manage replication and failover
|
||||
between multiple PostgreSQL servers. It enhances PostgreSQL's built-in
|
||||
hot-standby capabilities with tools to set up standby servers, monitor
|
||||
replication, and perform administrative tasks such as failover or manual
|
||||
switchover operations.
|
||||
|
||||
This document covers `repmgr 3`, which supports PostgreSQL 9.4 and 9.3.
|
||||
This version can use `pg_basebackup` to clone standby servers, supports
|
||||
replication slots and cascading replication, doesn't require a restart
|
||||
after promotion, and has many usability improvements.
|
||||
|
||||
Please continue to use `repmgr 2` with earlier PostgreSQL 9.x versions.
|
||||
For a list of changes since `repmgr 2` and instructions on upgrading to
|
||||
`repmgr 3`, see the "Upgrading from repmgr 2" section below.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
The `repmgr` command-line tool is used to perform administrative tasks,
|
||||
and the `repmgrd` daemon is used to optionally monitor replication and
|
||||
manage automatic failover.
|
||||
|
||||
To get started, each PostgreSQL node in your cluster must have a
|
||||
`repmgr.conf` file. The current master node must be registered using
|
||||
`repmgr master register`. Existing standby servers can be registered
|
||||
using `repmgr standby register`. A new standby server can be created
|
||||
using `repmgr standby clone` followed by `repmgr standby register`.
|
||||
|
||||
See the `QUICKSTART.md` file for examples of how to use these commands.
|
||||
|
||||
Once the cluster is in operation, run `repmgr cluster show` to see the
|
||||
status of the registered primary and standby nodes. Any standby can be
|
||||
manually promoted using `repmgr standby promote`. Other standby nodes
|
||||
can be told to follow the new master using `repmgr standby follow`. We
|
||||
show examples of these commands below.
|
||||
|
||||
Next, for detailed monitoring, you must run `repmgrd` (with the same
|
||||
configuration file) on all your nodes. Replication status information is
|
||||
stored in a custom schema along with information about registered nodes.
|
||||
You also need `repmgrd` to configure automatic failover in your cluster.
|
||||
|
||||
See the `FAILOVER.rst` file for an explanation of how to set up
|
||||
automatic failover.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
`repmgr` is developed and tested on Linux and OS X, but it should work
|
||||
on any UNIX-like system which PostgreSQL itself supports.
|
||||
|
||||
All nodes must be running the same major version of PostgreSQL, and we
|
||||
recommend that they also run the same minor version. This version of
|
||||
`repmgr` (v3) supports PostgreSQL 9.3 and 9.4.
|
||||
|
||||
Earlier versions of `repmgr` needed password-less SSH access between
|
||||
nodes in order to clone standby servers using `rsync`. `repmgr 3` can
|
||||
use `pg_basebackup` instead in most circumstances; ssh is not required.
|
||||
|
||||
You will need to use rsync only if your PostgreSQL configuration files
|
||||
are outside your data directory (as on Debian) and you wish these to
|
||||
be copied by `repmgr`. See the `SSH-RSYNC.md` file for details on
|
||||
configuring password-less SSH between your nodes.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
`repmgr` must be installed on each PostgreSQL server node.
|
||||
|
||||
* Packages
|
||||
- PGDG publishes RPM packages for RedHat-based distributions
|
||||
- Debian/Ubuntu provide .deb packages.
|
||||
- See `PACKAGES.md` for details on building .deb and .rpm packages
|
||||
from the `repmgr` source code.
|
||||
|
||||
* Source installation
|
||||
- `git clone https://github.com/2ndQuadrant/repmgr`
|
||||
- Or download tar.gz files from
|
||||
https://github.com/2ndQuadrant/repmgr/releases
|
||||
- To install from source, run `sudo make USE_PGXS=1 install`
|
||||
|
||||
After installation, you should be able to run `repmgr --version` and
|
||||
`repmgrd --version`. These binaries should be installed in the same
|
||||
directory as other PostgreSQL binaries, such as `psql`.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
### Server configuration
|
||||
|
||||
By default, `repmgr` uses PostgreSQL's built-in replication protocol to
|
||||
clone a primary and create a standby server. If your configuration files
|
||||
live outside your data directory, however, you will still need to set up
|
||||
password-less SSH so that rsync can be used. See the `SSH-RSYNC.md` file
|
||||
for details.
|
||||
|
||||
### PostgreSQL configuration
|
||||
|
||||
The primary server needs to be configured for replication with the
|
||||
following settings in `postgresql.conf`:
|
||||
|
||||
# Allow read-only queries on standby servers. The number of WAL
|
||||
# senders should be larger than the number of standby servers.
|
||||
|
||||
hot_standby = on
|
||||
wal_level = 'hot_standby'
|
||||
max_wal_senders = 10
|
||||
|
||||
# How much WAL to retain on the primary to allow a temporarily
|
||||
# disconnected standby to catch up again. The larger this is, the
|
||||
# longer the standby can be disconnected. This is needed only in
|
||||
# 9.3; in 9.4, replication slots can be used instead (see below).
|
||||
|
||||
wal_keep_segments = 5000
|
||||
|
||||
# Enable archiving, but leave it unconfigured (so that it can be
|
||||
# configured without a restart later). Recommended, not required.
|
||||
|
||||
archive_mode = on
|
||||
archive_command = 'cd .'
|
||||
|
||||
# You can also set additional replication parameters here, such as
|
||||
# hot_standby_feedback or synchronous_standby_names.
|
||||
|
||||
PostgreSQL 9.4 makes it possible to use replication slots, which means
|
||||
the value of wal_keep_segments need no longer be set. With 9.3, `repmgr`
|
||||
expects it to be set to at least 5000 (= 80GB of WAL) by default, though
|
||||
this can be overriden with the `-w N` argument.
|
||||
|
||||
A dedicated PostgreSQL superuser account and a database in which to
|
||||
store monitoring and replication data are required. Create them by
|
||||
running the following commands:
|
||||
|
||||
createuser -s repmgr
|
||||
createdb repmgr -O repmgr
|
||||
|
||||
We recommend using the name `repmgr` for both, but you can use whatever
|
||||
name you like (and you need to set the names you chose in the `conninfo`
|
||||
string in `repmgr.conf`; see below). `repmgr` will create the schema and
|
||||
objects it needs when it connects to the server.
|
||||
|
||||
### repmgr configuration
|
||||
|
||||
Create a `repmgr.conf` file on each server. Here's a minimal sample:
|
||||
|
||||
cluster=test
|
||||
node=1
|
||||
node_name=node1
|
||||
conninfo='host=repmgr_node1 user=repmgr dbname=repmgr'
|
||||
|
||||
The `cluster` name must be the same on all nodes. The `node` (an
|
||||
integer) and `node_name` must be unique to each node.
|
||||
|
||||
The `conninfo` string must point to repmgr's database *on this node*.
|
||||
The host must be an IP or a name that all the nodes in the cluster can
|
||||
resolve (not `localhost`!). All nodes must use the same username and
|
||||
database name, but other parameters, such as the port, can vary between
|
||||
nodes.
|
||||
|
||||
Your `repmgr.conf` should not be stored inside the PostgreSQL data
|
||||
directory. We recommend `/etc/repmgr/repmgr.conf`, but you can place it
|
||||
anywhere and use the `-f /path/to/repmgr.conf` option to tell `repmgr`
|
||||
where it is. If not specified, `repmgr` will search for `repmgr.conf` in
|
||||
the current working directory.
|
||||
|
||||
If your PostgreSQL binaries (`pg_ctl`, `pg_basebackup`) are not in your
|
||||
`PATH`, you can specify an alternate location in `repmgr.conf`:
|
||||
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
See `repmgr.conf.sample` for an example configuration file with all
|
||||
available configuration settings annotated.
|
||||
|
||||
### Starting up
|
||||
|
||||
The master node must be registered first using `repmgr master register`,
|
||||
and standby servers must be registered using `repmgr standby register`;
|
||||
this inserts details about each node into the control database. Use
|
||||
`repmgr cluster show` to see the result.
|
||||
|
||||
See the `QUICKSTART.md` file for examples of how to use these commands.
|
||||
|
||||
Failover
|
||||
--------
|
||||
|
||||
To promote a standby to master, on the standby execute e.g.:
|
||||
|
||||
repmgr -f /etc/repmgr/repmgr.conf --verbose standby promote
|
||||
|
||||
`repmgr` will attempt to connect to the current master to verify that it
|
||||
is not available (if it is, `repmgr` will not promote the standby).
|
||||
|
||||
Other standby servers need to be told to follow the new master with e.g.:
|
||||
|
||||
repmgr -f /etc/repmgr/repmgr.conf --verbose standby follow
|
||||
|
||||
See file `FAILOVER.rst` for details on setting up automated failover.
|
||||
|
||||
|
||||
Converting a failed master to a standby
|
||||
---------------------------------------
|
||||
|
||||
Often it's desirable to bring a failed master back into replication
|
||||
as a standby. First, ensure that the master's PostgreSQL server is
|
||||
no longer running; then use `repmgr standby clone` to re-sync its
|
||||
data directory with the current master, e.g.:
|
||||
|
||||
repmgr -f /etc/repmgr/repmgr.conf \
|
||||
--force --rsync-only \
|
||||
-h node2 -d repmgr -U repmgr --verbose \
|
||||
standby clone
|
||||
|
||||
Here it's essential to use the command line options `--force`, to
|
||||
ensure `repmgr` will re-use the existing data directory, and
|
||||
`--rsync-only`, which causes `repmgr` to use `rsync` rather than
|
||||
`pg_basebackup`, as the latter can only be used to clone a fresh
|
||||
standby.
|
||||
|
||||
The node can then be restarted.
|
||||
|
||||
The node will then need to be re-registered with `repmgr`; again
|
||||
the `--force` option is required to update the existing record:
|
||||
|
||||
repmgr -f /etc/repmgr/repmgr.conf
|
||||
--force \
|
||||
standby register
|
||||
|
||||
|
||||
|
||||
Replication management with repmgrd
|
||||
-----------------------------------
|
||||
|
||||
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
||||
and which can automate actions such as failover and updating standbys to
|
||||
follow the new master.`repmgrd` can be started simply with e.g.:
|
||||
|
||||
repmgrd -f /etc/repmgr/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
||||
|
||||
or alternatively:
|
||||
|
||||
repmgrd -f /etc/repmgr/repmgr.conf --verbose --monitoring-history > $HOME/repmgr/repmgrd.log 2>&1
|
||||
|
||||
which will track replication advance or lag on all registered standbys.
|
||||
|
||||
For permanent operation, we recommend using the options `-d/--daemonize` to
|
||||
detach the `repmgrd` process, and `-p/--pid-file` to write the process PID
|
||||
to a file.
|
||||
|
||||
Example log output (at default log level):
|
||||
|
||||
[2015-03-11 13:15:40] [INFO] checking cluster configuration with schema 'repmgr_test'
|
||||
[2015-03-11 13:15:40] [INFO] checking node 2 in cluster 'test'
|
||||
[2015-03-11 13:15:40] [INFO] reloading configuration file and updating repmgr tables
|
||||
[2015-03-11 13:15:40] [INFO] starting continuous standby node monitoring
|
||||
|
||||
|
||||
Witness server
|
||||
--------------
|
||||
|
||||
In a situation caused e.g. by a network interruption between two
|
||||
data centres, it's important to avoid a "split-brain" situation where
|
||||
both sides of the network assume they are the active segment and the
|
||||
side without an active master unilaterally promotes one of its standbys.
|
||||
|
||||
To prevent this situation happening, it's essential to ensure that one
|
||||
network segment has a "voting majority", so other segments will know
|
||||
they're in the minority and not attempt to promote a new master. Where
|
||||
an odd number of servers exists, this is not an issue. However, if each
|
||||
network has an even number of nodes, it's necessary to provide some way
|
||||
of ensuring a majority, which is where the witness server becomes useful.
|
||||
|
||||
This is not a fully-fledged standby node and is not integrated into
|
||||
replication, but it effectively represents the "casting vote" when
|
||||
deciding which network segment has a majority. A witness server can
|
||||
be set up using `repmgr witness create` (see below for details) and
|
||||
can run on a dedicated server or an existing node. Note that it only
|
||||
makes sense to create a witness server in conjunction with running
|
||||
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
||||
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
||||
constantly write node status information to the `repl_monitor` table, which can
|
||||
be queried easily using the view `repl_status`:
|
||||
|
||||
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
||||
-[ RECORD 1 ]-------------+-----------------------------
|
||||
primary_node | 1
|
||||
standby_node | 2
|
||||
standby_name | node2
|
||||
node_type | standby
|
||||
active | t
|
||||
last_monitor_time | 2015-03-11 14:02:34.51713+09
|
||||
last_wal_primary_location | 0/3012AF0
|
||||
last_wal_standby_location | 0/3012AF0
|
||||
replication_lag | 0 bytes
|
||||
replication_time_lag | 00:00:03.463085
|
||||
apply_lag | 0 bytes
|
||||
communication_time_lag | 00:00:00.955385
|
||||
|
||||
|
||||
Event logging and notifications
|
||||
-------------------------------
|
||||
|
||||
To help understand what significant events (e.g. failure of a node) happened
|
||||
when and for what reason, `repmgr` logs such events into the `repl_events`
|
||||
table, e.g.:
|
||||
|
||||
repmgr_db=# SELECT * from repmgr_test.repl_events ;
|
||||
node_id | event | successful | event_timestamp | details
|
||||
---------+------------------+------------+-------------------------------+-----------------------------------------------------------------------------------
|
||||
1 | master_register | t | 2015-03-16 17:36:21.711796+09 |
|
||||
2 | standby_clone | t | 2015-03-16 17:36:31.286934+09 | Cloned from host 'localhost', port 5500; backup method: pg_basebackup; --force: N
|
||||
2 | standby_register | t | 2015-03-16 17:36:32.391567+09 |
|
||||
(3 rows)
|
||||
|
||||
|
||||
Additionally `repmgr` can execute an external program each time an event is
|
||||
logged. This program is defined with the configuration variable
|
||||
`event_notification_command`; the command string can contain the following
|
||||
placeholders, which will be replaced with the same content which is
|
||||
written to the `repl_events` table:
|
||||
|
||||
%n - node id
|
||||
%e - event type
|
||||
%s - success (1 or 0)
|
||||
%t - timestamp
|
||||
%d - description
|
||||
|
||||
Example:
|
||||
|
||||
event_notification_command=/path/to/some-script %n %e %s "%t" "%d"
|
||||
|
||||
By default the program defined with `event_notification_command` will be
|
||||
executed for every event; to restrict execution to certain events, list
|
||||
these in the parameter `event_notifications`
|
||||
|
||||
event_notifications=master_register,standby_register
|
||||
|
||||
Following event types currently exist:
|
||||
|
||||
master_register
|
||||
standby_register
|
||||
standby_clone
|
||||
standby_promote
|
||||
witness_create
|
||||
repmgrd_start
|
||||
repmgrd_failover_promote
|
||||
repmgrd_failover_follow
|
||||
|
||||
|
||||
Cascading replication
|
||||
---------------------
|
||||
|
||||
Cascading replication - where a standby can connect to an upstream node and not
|
||||
the master server itself - was introduced in PostgreSQL 9.2. `repmgr` and
|
||||
`repmgrd` support cascading replication by keeping track of the relationship
|
||||
between standby servers - each node record is stored with the node id of its
|
||||
upstream ("parent") server (except of course the master server).
|
||||
|
||||
In a failover situation where the master node fails and a top-level standby
|
||||
is promoted, a standby connected to another standby will not be affected
|
||||
and continue working as normal (even if the upstream standby it's connected
|
||||
to becomes the master node). If however the node's direct upstream fails,
|
||||
the "cascaded standby" will attempt to reconnect to that node's parent.
|
||||
|
||||
To configure standby servers for cascading replication, add the parameter
|
||||
`upstream_node` to `repmgr.conf` and set it to the id of the node it should
|
||||
connect to, e.g.:
|
||||
|
||||
cluster=test
|
||||
node=2
|
||||
node_name=node2
|
||||
upstream_node=1
|
||||
|
||||
Replication slots
|
||||
-----------------
|
||||
|
||||
Replication slots were introduced with PostgreSQL 9.4 and enable standbys to
|
||||
notify the master of their WAL consumption, ensuring that the master will
|
||||
not remove any WAL files until they have been received by all standbys.
|
||||
This mitigates the requirement to manage WAL file retention using
|
||||
`wal_keep_segments` etc., with the caveat that if a standby fails, no WAL
|
||||
files will be removed until the standby's replication slot is deleted.
|
||||
|
||||
To enable replication slots, set the boolean parameter `use_replication_slots`
|
||||
in `repmgr.conf`:
|
||||
|
||||
use_replication_slots=1
|
||||
|
||||
`repmgr` will automatically generate an appropriate slot name, which is
|
||||
stored in the `repl_nodes` table.
|
||||
|
||||
Note that `repmgr` will fail with an error if this option is specified when
|
||||
working with PostgreSQL 9.3.
|
||||
|
||||
Further reading:
|
||||
* http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
||||
* http://blog.2ndquadrant.com/postgresql-9-4-slots/
|
||||
|
||||
Upgrading from repmgr 2
|
||||
-----------------------
|
||||
|
||||
`repmgr 3` is largely compatible with `repmgr 2`; the only step required
|
||||
to upgrade is to update the `repl_nodes` table to the definition needed
|
||||
by `repmgr 3`. See the file `sql/repmgr2_repmgr3.sql` for details on how
|
||||
to do this.
|
||||
|
||||
`repmgrd` must *not* be running while `repl_nodes` is being updated.
|
||||
|
||||
Existing `repmgr.conf` files can be retained as-is.
|
||||
|
||||
---------------------------------------
|
||||
|
||||
Reference
|
||||
---------
|
||||
|
||||
### repmgr command reference
|
||||
|
||||
Not all of these commands need the ``repmgr.conf`` file, but they need to be able to
|
||||
connect to the remote and local databases.
|
||||
|
||||
You can teach it which is the remote database by using the -h parameter or
|
||||
as a last parameter in standby clone and standby follow. If you need to specify
|
||||
a port different then the default 5432 you can specify a -p parameter.
|
||||
Standby is always considered as localhost and a second -p parameter will indicate
|
||||
its port if is different from the default one.
|
||||
|
||||
* `master register`
|
||||
|
||||
Registers a master in a cluster. This command needs to be executed before any
|
||||
standby nodes are registered.
|
||||
|
||||
* `standby register`
|
||||
|
||||
Registers a standby with `repmgr`. This command needs to be executed to enable
|
||||
promote/follow operations and to allow `repmgrd` to work with the node.
|
||||
An existing standby can be registered using this command.
|
||||
|
||||
* `standby clone [node to be cloned]`
|
||||
|
||||
Clones a new standby node from the data directory of the master (or
|
||||
an upstream cascading standby) using `pg_basebackup` or `rsync`.
|
||||
Additionally it will create the `recovery.conf` file required to
|
||||
start the server as a standby. This command does not require
|
||||
`repmgr.conf` to be provided, but does require connection details
|
||||
of the master or upstream server as command line parameters.
|
||||
|
||||
Provide the `-D/--data-dir` option to specify the destination data
|
||||
directory; if not, the same directory path as on the source server
|
||||
will be used. By default, `pg_basebackup` will be used to copy data
|
||||
from the master or upstream node but this can only be used for
|
||||
bootstrapping new installations. To update an existing but 'stale'
|
||||
data directory (for example belonging to a failed master), `rsync`
|
||||
must be used by specifying `--rsync-only`. In this case,
|
||||
password-less SSH connections between servers are required.
|
||||
|
||||
* `standby promote`
|
||||
|
||||
Promotes a standby to a master if the current master has failed. This
|
||||
command requires a valid `repmgr.conf` file for the standby, either
|
||||
specified explicitly with `-f/--config-file` or located in the current
|
||||
working directory; no additional arguments are required.
|
||||
|
||||
If the standby promotion succeeds, the server will not need to be
|
||||
restarted. However any other standbys will need to follow the new server,
|
||||
by using `standby follow` (see below); if `repmgrd` is active, it will
|
||||
handle this.
|
||||
|
||||
This command will not function if the current master is still running.
|
||||
|
||||
* `witness create`
|
||||
|
||||
Creates a witness server as a separate PostgreSQL instance. This instance
|
||||
can be on a separate server or a server running an existing node. The
|
||||
witness server contain a copy of the repmgr metadata tables but will not
|
||||
be set up as a standby; instead it will update its metadata copy each
|
||||
time a failover occurs.
|
||||
|
||||
Note that it only makes sense to create a witness server if `repmgrd`
|
||||
is in use; see section "witness server" above.
|
||||
|
||||
By default the witness server will use port 5499 to facilitate easier setup
|
||||
on a server running an existing node.
|
||||
|
||||
* `standby follow`
|
||||
|
||||
Attaches the standby to a new master. This command requires a valid
|
||||
`repmgr.conf` file for the standby, either specified explicitly with
|
||||
`-f/--config-file` or located in the current working directory; no
|
||||
additional arguments are required.
|
||||
|
||||
This command will force a restart of the standby server. It can only be used
|
||||
to attach a standby to a new master node.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each node in the replication cluster. This
|
||||
command polls each registered server and shows its role (master / standby /
|
||||
witness) or "FAILED" if the node doesn't respond. It polls each server
|
||||
directly and can be run on any node in the cluster; this is also useful
|
||||
when analyzing connectivity from a particular node.
|
||||
|
||||
This command requires a valid `repmgr.conf` file for the node on which it is
|
||||
executed, either specified explicitly with `-f/--config-file` or located in
|
||||
the current working directory; no additional arguments are required.
|
||||
|
||||
Example:
|
||||
|
||||
repmgr -f /path/to/repmgr.conf cluster show
|
||||
Role | Connection String
|
||||
* master | host=node1 dbname=repmgr user=repmgr
|
||||
standby | host=node2 dbname=repmgr user=repmgr
|
||||
standby | host=node3 dbname=repmgr user=repmgr
|
||||
|
||||
|
||||
* `cluster cleanup`
|
||||
|
||||
Purges monitoring history from the `repl_monitor` table to prevent excessive
|
||||
table growth. Use the `-k/--keep-history` to specify the number of days of
|
||||
monitoring history to retain. This command can be used manually or as a
|
||||
cronjob.
|
||||
|
||||
This command requires a valid `repmgr.conf` file for the node on which it is
|
||||
executed, either specified explicitly with `-f/--config-file` or located in
|
||||
the current working directory; no additional arguments are required.
|
||||
|
||||
### repmgr configuration file
|
||||
|
||||
See `repmgr.conf.sample` for an example configuration file with available
|
||||
configuration settings annotated.
|
||||
|
||||
### repmgr database schema
|
||||
|
||||
`repmgr` creates a small schema for its own use in the database specified in
|
||||
each node's `conninfo` configuration parameter. This database can in principle
|
||||
be any database. The schema name is the global `cluster` name prefixed
|
||||
with `repmgr_`, so for the example setup above the schema name is
|
||||
`repmgr_test`.
|
||||
|
||||
The schema contains two tables:
|
||||
|
||||
* `repl_nodes`
|
||||
stores information about all registered servers in the cluster
|
||||
* `repl_monitor`
|
||||
stores monitoring information about each node (generated by `repmgrd` with
|
||||
`-m/--monitoring-history` option enabled)
|
||||
|
||||
and one view:
|
||||
* `repl_status`
|
||||
summarizes the latest monitoring information for each node (generated by `repmgrd` with
|
||||
`-m/--monitoring-history` option enabled)
|
||||
|
||||
### Error codes
|
||||
|
||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
||||
exit:
|
||||
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||
|
||||
|
||||
Support and Assistance
|
||||
----------------------
|
||||
|
||||
2ndQuadrant provides 24x7 production support for repmgr, including
|
||||
configuration assistance, installation verification and training for
|
||||
running a robust replication cluster. For further details see:
|
||||
|
||||
* http://2ndquadrant.com/en/support/
|
||||
|
||||
There is a mailing list/forum to discuss contributions or issues
|
||||
http://groups.google.com/group/repmgr
|
||||
|
||||
The IRC channel #repmgr is registered with freenode.
|
||||
|
||||
Further information is available at http://www.repmgr.org/
|
||||
|
||||
We'd love to hear from you about how you use repmgr. Case studies and
|
||||
news are always welcome. Send us an email at info@2ndQuadrant.com, or
|
||||
send a postcard to
|
||||
|
||||
repmgr
|
||||
c/o 2ndQuadrant
|
||||
7200 The Quorum
|
||||
Oxford Business Park North
|
||||
Oxford
|
||||
OX4 2JZ
|
||||
United Kingdom
|
||||
|
||||
Thanks from the repmgr core team.
|
||||
|
||||
* Ian Barwick
|
||||
* Jaime Casanova
|
||||
* Abhijit Menon-Sen
|
||||
* Simon Riggs
|
||||
* Cedric Villemain
|
||||
|
||||
Further reading
|
||||
---------------
|
||||
|
||||
* http://blog.2ndquadrant.com/announcing-repmgr-2-0/
|
||||
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
||||
1144
README.rst
1144
README.rst
File diff suppressed because it is too large
Load Diff
@@ -1,18 +1,21 @@
|
||||
Summary: repmgr
|
||||
Name: repmgr
|
||||
Version: 2.0
|
||||
Release: 2
|
||||
Version: 3.0
|
||||
Release: 1
|
||||
License: GPLv3
|
||||
Group: System Environment/Daemons
|
||||
URL: http://repmgr.org
|
||||
Packager: Nathan Van Overloop <nathan.van.overloop@nexperteam.be>
|
||||
URL: http://repmgr.org
|
||||
Packager: Ian Barwick <ian@2ndquadrant.com>
|
||||
Vendor: 2ndQuadrant Limited
|
||||
Distribution: centos
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||
|
||||
%description
|
||||
repmgr for centos6
|
||||
repmgr is a utility suite which greatly simplifies
|
||||
the process of setting up and managing replication
|
||||
using streaming replication within a cluster of
|
||||
PostgreSQL servers.
|
||||
|
||||
%prep
|
||||
%setup
|
||||
@@ -32,7 +35,7 @@ export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
|
||||
%clean
|
||||
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
|
||||
|
||||
|
||||
|
||||
%files
|
||||
%defattr(-,root,root)
|
||||
@@ -45,13 +48,14 @@ export PATH=$PATH:/usr/pgsql-9.3/bin/
|
||||
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
|
||||
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
|
||||
%attr(0755,root,root)/etc/init.d/repmgrd
|
||||
%attr(0755,root,root)/etc/init.d/repmgrd
|
||||
%attr(0644,root,root)/etc/sysconfig/repmgrd
|
||||
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
||||
|
||||
%changelog
|
||||
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
|
||||
- build for repmgr 3.0
|
||||
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
||||
- fix witness creation to create db and user if needed
|
||||
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
||||
- initial build for RHEL6
|
||||
|
||||
@@ -1,114 +1,89 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
#
|
||||
# chkconfig: - 75 16
|
||||
# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# processname: repmgrd
|
||||
# pidfile="/var/run/${NAME}.pid"
|
||||
# repmgrd Start up the repmgrd daemon
|
||||
# repmrgd (replication manager daemon)
|
||||
#
|
||||
# chkconfig: - 75 16
|
||||
# description: repmgrd is the repliation manager daemon \
|
||||
# The repmgrd replication management and monitoring daemon for PostgreSQL.
|
||||
|
||||
### BEGIN INIT INFO
|
||||
# Provides: repmgrd
|
||||
# Required-Start: $local_fs $remote_fs $network $syslog postgresql
|
||||
# Required-Stop: $local_fs $remote_fs $network $syslog postgresql
|
||||
# Should-Start: $syslog postgresql-9.3
|
||||
# Should-Stop: $syslog postgresql-9.3
|
||||
# Short-Description: start and stop repmrgd
|
||||
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
|
||||
# this is used to monitor a postgresql cluster.
|
||||
### END INIT INFO
|
||||
|
||||
# Source function library.
|
||||
INITD=/etc/rc.d/init.d
|
||||
. $INITD/functions
|
||||
. /etc/init.d/functions
|
||||
|
||||
# Get function listing for cross-distribution logic.
|
||||
TYPESET=`typeset -f|grep "declare"`
|
||||
|
||||
# Get network config.
|
||||
# Source networking configuration.
|
||||
. /etc/sysconfig/network
|
||||
|
||||
DESC="PostgreSQL replication management and monitoring daemon"
|
||||
NAME=repmgrd
|
||||
|
||||
REPMGRD_ENABLED=no
|
||||
prog=repmgrd
|
||||
REPMGRD_ENABLED=yes
|
||||
REPMGRD_OPTS=
|
||||
REPMGRD_USER=postgres
|
||||
REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
|
||||
REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
||||
REPMGRD_LOCK=/var/lock/subsys/${NAME}
|
||||
REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
|
||||
DAEMONIZE="-d"
|
||||
|
||||
# Read configuration variable file if it is present
|
||||
[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
|
||||
# pull in sysconfig settings
|
||||
[ -f /etc/sysconfig/repmgrd ] && . /etc/sysconfig/repmgrd
|
||||
|
||||
# For SELinux we need to use 'runuser' not 'su'
|
||||
if [ -x /sbin/runuser ]
|
||||
then
|
||||
SU=runuser
|
||||
else
|
||||
SU=su
|
||||
fi
|
||||
|
||||
test -x $REPMGRD_BIN || exit 0
|
||||
LOCKFILE=/var/lock/subsys/$prog
|
||||
RETVAL=0
|
||||
|
||||
case "$REPMGRD_ENABLED" in
|
||||
[Yy]*)
|
||||
break
|
||||
#nothing to do here
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
if [ -z "${REPMGRD_OPTS}" ]
|
||||
if [ -z "$REPMGRD_OPTS" ]
|
||||
then
|
||||
echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
|
||||
exit 0
|
||||
echo "Not starting $prog, REPMGRD_OPTS not set in /etc/sysconfig/$prog"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
start()
|
||||
{
|
||||
REPMGRD_START=$"Starting ${NAME} service: "
|
||||
start() {
|
||||
[ "$EUID" != "0" ] && exit 4
|
||||
[ "$NETWORKING" = "no" ] && exit 1
|
||||
|
||||
# Make sure startup-time log file is valid
|
||||
if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
|
||||
then
|
||||
touch "${REPMGRD_LOG}" || exit 1
|
||||
chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
|
||||
chmod go-rwx "${REPMGRD_LOG}"
|
||||
[ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
|
||||
fi
|
||||
|
||||
echo -n "${REPMGRD_START}"
|
||||
$SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
|
||||
sleep 2
|
||||
pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
|
||||
if [ "x${pid}" != "x" ]
|
||||
then
|
||||
success "${REPMGRD_START}"
|
||||
touch "${REPMGRD_LOCK}"
|
||||
echo $pid > "${REPMGRD_PIDFILE}"
|
||||
# Start daemons.
|
||||
echo -n $"Starting $prog: "
|
||||
daemon --user $REPMGRD_USER $prog $DAEMONIZE $REPMGRD_OPTS
|
||||
RETVAL=$?
|
||||
echo
|
||||
else
|
||||
failure "${REPMGRD_START}"
|
||||
echo
|
||||
script_result=1
|
||||
fi
|
||||
[ $RETVAL -eq 0 ] && touch $LOCKFILE
|
||||
return $RETVAL
|
||||
}
|
||||
|
||||
stop()
|
||||
{
|
||||
echo -n $"Stopping ${NAME} service: "
|
||||
if [ -e "${REPMGRD_LOCK}" ]
|
||||
then
|
||||
killproc ${NAME}
|
||||
ret=$?
|
||||
if [ $ret -eq 0 ]
|
||||
then
|
||||
echo_success
|
||||
rm -f "${REPMGRD_PIDFILE}"
|
||||
rm -f "${REPMGRD_LOCK}"
|
||||
stop() {
|
||||
[ "$EUID" != "0" ] && exit 4
|
||||
echo -n $"Shutting down $prog: "
|
||||
killproc $prog
|
||||
RETVAL=$?
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
|
||||
return $RETVAL
|
||||
}
|
||||
status() {
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
echo "$prog is running"
|
||||
else
|
||||
echo_failure
|
||||
script_result=1
|
||||
RETVAL=3
|
||||
echo "$prog is stopped"
|
||||
fi
|
||||
else
|
||||
# not running; per LSB standards this is "ok"
|
||||
echo_success
|
||||
fi
|
||||
echo
|
||||
return $RETVAL
|
||||
}
|
||||
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
@@ -118,16 +93,22 @@ case "$1" in
|
||||
stop
|
||||
;;
|
||||
status)
|
||||
status -p $REPMGRD_PIDFILE $NAME
|
||||
script_result=$?
|
||||
status $prog
|
||||
;;
|
||||
restart)
|
||||
restart|force-reload)
|
||||
stop
|
||||
start
|
||||
start
|
||||
;;
|
||||
try-restart|condrestart)
|
||||
if status $prog > /dev/null; then
|
||||
stop
|
||||
start
|
||||
fi
|
||||
;;
|
||||
reload)
|
||||
exit 3
|
||||
;;
|
||||
*)
|
||||
echo $"Usage: $0 {start|stop|status|restart}"
|
||||
echo $"Usage: $0 {start|stop|status|restart|try-restart|force-reload}"
|
||||
exit 2
|
||||
esac
|
||||
|
||||
exit $script_result
|
||||
|
||||
@@ -1,21 +1,4 @@
|
||||
# default settings for repmgrd. This file is source by /bin/sh from
|
||||
# /etc/init.d/repmgrd
|
||||
#default sysconfig file for repmrgd
|
||||
#custom overrides can be placed here
|
||||
|
||||
# disable repmgrd by default so it won't get started upon installation
|
||||
# valid values: yes/no
|
||||
REPMGRD_ENABLED=no
|
||||
|
||||
# Options for repmgrd (required)
|
||||
#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
|
||||
|
||||
# User to run repmgrd as
|
||||
#REPMGRD_USER=postgres
|
||||
|
||||
# repmgrd binary
|
||||
#REPMGRD_BIN=/usr/bin/repmgr
|
||||
|
||||
# pid file
|
||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||
|
||||
# log file
|
||||
#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
|
||||
REPMGRD_OPTS="-f /etc/repmgr/repmgr.conf"
|
||||
|
||||
35
SSH-RSYNC.md
Normal file
35
SSH-RSYNC.md
Normal file
@@ -0,0 +1,35 @@
|
||||
Set up trusted copy between postgres accounts
|
||||
---------------------------------------------
|
||||
|
||||
If you need to use rsync to clone standby servers, the postgres account
|
||||
on your master and standby servers must be each able to access the other
|
||||
using SSH without a password.
|
||||
|
||||
First generate a ssh key, using an empty passphrase, and copy the resulting
|
||||
keys and a maching authorization file to a privledged user on the other system::
|
||||
|
||||
[postgres@node1]$ ssh-keygen -t rsa
|
||||
Generating public/private rsa key pair.
|
||||
Enter file in which to save the key (/var/lib/pgsql/.ssh/id_rsa):
|
||||
Enter passphrase (empty for no passphrase):
|
||||
Enter same passphrase again:
|
||||
Your identification has been saved in /var/lib/pgsql/.ssh/id_rsa.
|
||||
Your public key has been saved in /var/lib/pgsql/.ssh/id_rsa.pub.
|
||||
The key fingerprint is:
|
||||
aa:bb:cc:dd:ee:ff:aa:11:22:33:44:55:66:77:88:99 postgres@db1.domain.com
|
||||
[postgres@node1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
||||
[postgres@node1]$ chmod go-rwx ~/.ssh/*
|
||||
[postgres@node1]$ cd ~/.ssh
|
||||
[postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys user@node2:
|
||||
|
||||
Login as a user on the other system, and install the files into the postgres
|
||||
user's account::
|
||||
|
||||
[user@node2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa
|
||||
[user@node2 ~]$ sudo mkdir -p ~postgres/.ssh
|
||||
[user@node2 ~]$ sudo chown postgres.postgres ~postgres/.ssh
|
||||
[user@node2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh
|
||||
[user@node2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh
|
||||
|
||||
Now test that ssh in both directions works. You may have to accept some new
|
||||
known hosts in the process.
|
||||
11
TODO
11
TODO
@@ -8,11 +8,18 @@ Known issues in repmgr
|
||||
Planned feature improvements
|
||||
============================
|
||||
|
||||
* Timeline increases when promoting a standby
|
||||
|
||||
* A better check which standby did receive most of the data
|
||||
|
||||
* Make the fact that a standby may be delayed a factor in the voting
|
||||
algorithm
|
||||
|
||||
* include support for delayed standbys
|
||||
|
||||
* Create the repmgr user/database on "master register".
|
||||
|
||||
* Use pg_basebackup for the data directory, and ALSO rsync for the
|
||||
configuration files.
|
||||
|
||||
* Use pg_basebackup -X s
|
||||
NOTE: this can be used by including `-X s` in the configuration parameter
|
||||
`pg_basebackup_options`
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* check_dir.c - Directories management functions
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -98,7 +98,7 @@ create_dir(char *dir)
|
||||
if (mkdir_p(dir, 0700) == 0)
|
||||
return true;
|
||||
|
||||
log_err(_("Could not create directory \"%s\": %s\n"),
|
||||
log_err(_("unable to create directory \"%s\": %s\n"),
|
||||
dir, strerror(errno));
|
||||
|
||||
return false;
|
||||
@@ -255,7 +255,7 @@ create_pg_dir(char *dir, bool force)
|
||||
|
||||
if (!create_dir(dir))
|
||||
{
|
||||
log_err(_("couldn't create directory \"%s\"...\n"),
|
||||
log_err(_("unable to create directory \"%s\"...\n"),
|
||||
dir);
|
||||
return false;
|
||||
}
|
||||
@@ -267,7 +267,7 @@ create_pg_dir(char *dir, bool force)
|
||||
|
||||
if (!set_dir_permissions(dir))
|
||||
{
|
||||
log_err(_("could not change permissions of directory \"%s\": %s\n"),
|
||||
log_err(_("unable to change permissions of directory \"%s\": %s\n"),
|
||||
dir, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* check_dir.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
||||
516
config.c
516
config.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* config.c - Functions to parse the config file
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -17,59 +17,138 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/stat.h> /* for stat() */
|
||||
|
||||
#include "config.h"
|
||||
#include "log.h"
|
||||
#include "strutil.h"
|
||||
#include "repmgr.h"
|
||||
|
||||
void
|
||||
parse_config(const char *config_file, t_configuration_options * options)
|
||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
|
||||
|
||||
/*
|
||||
* parse_config()
|
||||
*
|
||||
* Set default options and overwrite with values from provided configuration
|
||||
* file.
|
||||
*
|
||||
* Returns true if a configuration file could be parsed, otherwise false.
|
||||
*
|
||||
* Any configuration options changed in this function must also be changed in
|
||||
* reload_config()
|
||||
*/
|
||||
bool
|
||||
parse_config(const char *config_file, t_configuration_options *options)
|
||||
{
|
||||
char *s,
|
||||
buff[MAXLINELENGTH];
|
||||
char config_file_buf[MAXLEN];
|
||||
char name[MAXLEN];
|
||||
char value[MAXLEN];
|
||||
bool config_file_provided = false;
|
||||
FILE *fp;
|
||||
|
||||
FILE *fp = fopen(config_file, "r");
|
||||
/* Sanity checks */
|
||||
|
||||
/* Initialize */
|
||||
/*
|
||||
* If a configuration file was provided, check it exists, otherwise
|
||||
* emit an error
|
||||
*/
|
||||
if (config_file[0])
|
||||
{
|
||||
struct stat config;
|
||||
|
||||
strncpy(config_file_buf, config_file, MAXLEN);
|
||||
canonicalize_path(config_file_buf);
|
||||
|
||||
if(stat(config_file_buf, &config) != 0)
|
||||
{
|
||||
log_err(_("provided configuration file '%s' not found: %s\n"),
|
||||
config_file,
|
||||
strerror(errno)
|
||||
);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
config_file_provided = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If no configuration file was provided, set to a default file
|
||||
* which `parse_config()` will attempt to read if it exists
|
||||
*/
|
||||
else
|
||||
{
|
||||
strncpy(config_file_buf, DEFAULT_CONFIG_FILE, MAXLEN);
|
||||
}
|
||||
|
||||
|
||||
fp = fopen(config_file_buf, "r");
|
||||
|
||||
/*
|
||||
* Since some commands don't require a config file at all, not having one
|
||||
* isn't necessarily a problem.
|
||||
*
|
||||
* If the user explictly provided a configuration file and we can't
|
||||
* read it we'll raise an error.
|
||||
*
|
||||
* If no configuration file was provided, we'll try and read the default\
|
||||
* file if it exists and is readable, but won't worry if it's not.
|
||||
*/
|
||||
if (fp == NULL)
|
||||
{
|
||||
if(config_file_provided)
|
||||
{
|
||||
log_err(_("unable to open provided configuration file '%s'; terminating\n"), config_file_buf);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_notice(_("no configuration file provided and default file '%s' not found - "
|
||||
"continuing with default values\n"),
|
||||
DEFAULT_CONFIG_FILE);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Initialize configuration options with sensible defaults */
|
||||
memset(options->cluster_name, 0, sizeof(options->cluster_name));
|
||||
options->node = -1;
|
||||
options->upstream_node = NO_UPSTREAM_NODE;
|
||||
memset(options->conninfo, 0, sizeof(options->conninfo));
|
||||
options->failover = MANUAL_FAILOVER;
|
||||
options->priority = 0;
|
||||
options->priority = DEFAULT_PRIORITY;
|
||||
memset(options->node_name, 0, sizeof(options->node_name));
|
||||
memset(options->promote_command, 0, sizeof(options->promote_command));
|
||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
||||
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||
memset(options->pgctl_options, 0, sizeof(options->pgctl_options));
|
||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||
memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
|
||||
|
||||
/* if nothing has been provided defaults to 60 */
|
||||
/* default master_response_timeout is 60 seconds */
|
||||
options->master_response_timeout = 60;
|
||||
|
||||
/* it defaults to 6 retries with a time between retries of 10s */
|
||||
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
||||
options->reconnect_attempts = 6;
|
||||
options->reconnect_intvl = 10;
|
||||
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/*
|
||||
* Since some commands don't require a config file at all, not having one
|
||||
* isn't necessarily a problem.
|
||||
*/
|
||||
if (fp == NULL)
|
||||
{
|
||||
log_err(_("Did not find the configuration file '%s', continuing\n"),
|
||||
config_file);
|
||||
return;
|
||||
}
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
options->tablespace_mapping.tail = NULL;
|
||||
|
||||
|
||||
|
||||
/* Read next line */
|
||||
while ((s = fgets(buff, sizeof buff, fp)) != NULL)
|
||||
{
|
||||
bool known_parameter = true;
|
||||
|
||||
/* Skip blank lines and comments */
|
||||
if (buff[0] == '\n' || buff[0] == '#')
|
||||
continue;
|
||||
@@ -82,6 +161,8 @@ parse_config(const char *config_file, t_configuration_options * options)
|
||||
strncpy(options->cluster_name, value, MAXLEN);
|
||||
else if (strcmp(name, "node") == 0)
|
||||
options->node = atoi(value);
|
||||
else if (strcmp(name, "upstream_node") == 0)
|
||||
options->upstream_node = atoi(value);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy(options->conninfo, value, MAXLEN);
|
||||
else if (strcmp(name, "rsync_options") == 0)
|
||||
@@ -99,13 +180,17 @@ parse_config(const char *config_file, t_configuration_options * options)
|
||||
strncpy(failoverstr, value, MAXLEN);
|
||||
|
||||
if (strcmp(failoverstr, "manual") == 0)
|
||||
{
|
||||
options->failover = MANUAL_FAILOVER;
|
||||
}
|
||||
else if (strcmp(failoverstr, "automatic") == 0)
|
||||
{
|
||||
options->failover = AUTOMATIC_FAILOVER;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("value for failover option is incorrect, it should be automatic or manual. Defaulting to manual.\n"));
|
||||
options->failover = MANUAL_FAILOVER;
|
||||
log_err(_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
else if (strcmp(name, "priority") == 0)
|
||||
@@ -125,56 +210,90 @@ parse_config(const char *config_file, t_configuration_options * options)
|
||||
else if (strcmp(name, "pg_bindir") == 0)
|
||||
strncpy(options->pg_bindir, value, MAXLEN);
|
||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||
strncpy(options->pgctl_options, value, MAXLEN);
|
||||
strncpy(options->pg_ctl_options, value, MAXLEN);
|
||||
else if (strcmp(name, "pg_basebackup_options") == 0)
|
||||
strncpy(options->pg_basebackup_options, value, MAXLEN);
|
||||
else if (strcmp(name, "logfile") == 0)
|
||||
strncpy(options->logfile, value, MAXLEN);
|
||||
else if (strcmp(name, "monitor_interval_secs") == 0)
|
||||
options->monitor_interval_secs = atoi(value);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = atoi(value);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
options->use_replication_slots = atoi(value);
|
||||
else if (strcmp(name, "event_notification_command") == 0)
|
||||
strncpy(options->event_notification_command, value, MAXLEN);
|
||||
else if (strcmp(name, "event_notifications") == 0)
|
||||
parse_event_notifications_list(options, value);
|
||||
else if (strcmp(name, "tablespace_mapping") == 0)
|
||||
tablespace_list_append(options, value);
|
||||
else
|
||||
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
|
||||
{
|
||||
known_parameter = false;
|
||||
log_warning(_("%s/%s: unknown name/value pair provided; ignoring\n"), name, value);
|
||||
}
|
||||
|
||||
/*
|
||||
* Raise an error if a known parameter is provided with an empty value.
|
||||
* Currently there's no reason why empty parameters are needed; if
|
||||
* we want to accept those, we'd need to add stricter default checking,
|
||||
* as currently e.g. an empty `node` value will be converted to '0'.
|
||||
*/
|
||||
if(known_parameter == true && !strlen(value)) {
|
||||
log_err(_("no value provided for parameter '%s'\n"), name);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
/* Close file */
|
||||
fclose(fp);
|
||||
|
||||
/* Check config settings */
|
||||
|
||||
/* The following checks are for the presence of the parameter */
|
||||
if (*options->cluster_name == '\0')
|
||||
{
|
||||
log_err(_("Cluster name is missing. Check the configuration file.\n"));
|
||||
log_err(_("required parameter 'cluster' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->node == -1)
|
||||
{
|
||||
log_err(_("Node information is missing. Check the configuration file.\n"));
|
||||
log_err(_("required parameter 'node' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->node_name == '\0')
|
||||
{
|
||||
log_err(_("required parameter 'node_name' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->conninfo == '\0')
|
||||
{
|
||||
log_err(_("required parameter 'conninfo' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* The following checks are for valid parameter values */
|
||||
if (options->master_response_timeout <= 0)
|
||||
{
|
||||
log_err(_("Master response timeout must be greater than zero. Check the configuration file.\n"));
|
||||
log_err(_("'master_response_timeout' must be greater than zero\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->reconnect_attempts < 0)
|
||||
{
|
||||
log_err(_("Reconnect attempts must be zero or greater. Check the configuration file.\n"));
|
||||
log_err(_("'reconnect_attempts' must be zero or greater\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->reconnect_intvl <= 0)
|
||||
if (options->reconnect_intvl < 0)
|
||||
{
|
||||
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
|
||||
log_err(_("'reconnect_interval' must be zero or greater\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->pg_bindir == '\0')
|
||||
{
|
||||
log_err(_("pg_bindir config value not found. Check the configuration file.\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -242,85 +361,137 @@ bool
|
||||
reload_config(char *config_file, t_configuration_options * orig_options)
|
||||
{
|
||||
PGconn *conn;
|
||||
|
||||
t_configuration_options new_options;
|
||||
bool config_changed = false;
|
||||
|
||||
/*
|
||||
* Re-read the configuration file: repmgr.conf
|
||||
*/
|
||||
log_info(_("Reloading configuration file and updating repmgr tables\n"));
|
||||
log_info(_("reloading configuration file and updating repmgr tables\n"));
|
||||
|
||||
parse_config(config_file, &new_options);
|
||||
if (new_options.node == -1)
|
||||
{
|
||||
log_warning(_("Cannot load new configuration, will keep current one.\n"));
|
||||
log_warning(_("unable to parse new configuration, retaining current configuration\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
|
||||
{
|
||||
log_warning(_("Cannot change cluster name, will keep current configuration.\n"));
|
||||
log_warning(_("unable to change cluster name, retaining current configuration\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.node != orig_options->node)
|
||||
{
|
||||
log_warning(_("Cannot change node number, will keep current configuration.\n"));
|
||||
log_warning(_("unable to change node ID, retaining current configuration\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strcmp(new_options.node_name, orig_options->node_name) != 0)
|
||||
{
|
||||
log_warning(_("Cannot change standby name, will keep current configuration.\n"));
|
||||
log_warning(_("unable to change standby name, keeping current configuration\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
|
||||
{
|
||||
log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
|
||||
log_warning(_("new value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.master_response_timeout <= 0)
|
||||
{
|
||||
log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n"));
|
||||
log_warning(_("new value for 'master_response_timeout' must be greater than zero\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.reconnect_attempts < 0)
|
||||
{
|
||||
log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
|
||||
log_warning(_("new value for 'reconnect_attempts' must be zero or greater\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.reconnect_intvl < 0)
|
||||
{
|
||||
log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
|
||||
log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Test conninfo string */
|
||||
conn = establish_db_connection(new_options.conninfo, false);
|
||||
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
||||
if(strcmp(orig_options->conninfo, new_options.conninfo) != 0)
|
||||
{
|
||||
log_warning(_("conninfo string is not valid, will keep current configuration.\n"));
|
||||
return false;
|
||||
/* Test conninfo string */
|
||||
conn = establish_db_connection(new_options.conninfo, false);
|
||||
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
log_warning(_("'conninfo' string is not valid, retaining current configuration\n"));
|
||||
return false;
|
||||
}
|
||||
PQfinish(conn);
|
||||
}
|
||||
PQfinish(conn);
|
||||
|
||||
/* Configuration seems ok, will load new values */
|
||||
strcpy(orig_options->cluster_name, new_options.cluster_name);
|
||||
orig_options->node = new_options.node;
|
||||
strcpy(orig_options->conninfo, new_options.conninfo);
|
||||
orig_options->failover = new_options.failover;
|
||||
orig_options->priority = new_options.priority;
|
||||
strcpy(orig_options->node_name, new_options.node_name);
|
||||
strcpy(orig_options->promote_command, new_options.promote_command);
|
||||
strcpy(orig_options->follow_command, new_options.follow_command);
|
||||
strcpy(orig_options->rsync_options, new_options.rsync_options);
|
||||
strcpy(orig_options->ssh_options, new_options.ssh_options);
|
||||
orig_options->master_response_timeout = new_options.master_response_timeout;
|
||||
orig_options->reconnect_attempts = new_options.reconnect_attempts;
|
||||
orig_options->reconnect_intvl = new_options.reconnect_intvl;
|
||||
/*
|
||||
* No configuration problems detected - copy any changed values
|
||||
*
|
||||
* NB: keep these in the same order as in config.h to make it easier
|
||||
* to manage them
|
||||
*/
|
||||
|
||||
/* cluster_name */
|
||||
if(strcmp(orig_options->cluster_name, new_options.cluster_name) != 0)
|
||||
{
|
||||
strcpy(orig_options->cluster_name, new_options.cluster_name);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* conninfo */
|
||||
if(strcmp(orig_options->conninfo, new_options.conninfo) != 0)
|
||||
{
|
||||
strcpy(orig_options->conninfo, new_options.conninfo);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* node */
|
||||
if(orig_options->node != new_options.node)
|
||||
{
|
||||
orig_options->node = new_options.node;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* failover */
|
||||
if(orig_options->failover != new_options.failover)
|
||||
{
|
||||
orig_options->failover = new_options.failover;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* priority */
|
||||
if(orig_options->priority != new_options.priority)
|
||||
{
|
||||
orig_options->priority = new_options.priority;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* node_name */
|
||||
if(strcmp(orig_options->node_name, new_options.node_name) != 0)
|
||||
{
|
||||
strcpy(orig_options->node_name, new_options.node_name);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* promote_command */
|
||||
if(strcmp(orig_options->promote_command, new_options.promote_command) != 0)
|
||||
{
|
||||
strcpy(orig_options->promote_command, new_options.promote_command);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* follow_command */
|
||||
if(strcmp(orig_options->follow_command, new_options.follow_command) != 0)
|
||||
{
|
||||
strcpy(orig_options->follow_command, new_options.follow_command);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX These ones can change with a simple SIGHUP?
|
||||
@@ -332,5 +503,218 @@ reload_config(char *config_file, t_configuration_options * orig_options)
|
||||
* orig_options.loglevel, orig_options.logfacility);
|
||||
*/
|
||||
|
||||
return true;
|
||||
/* rsync_options */
|
||||
if(strcmp(orig_options->rsync_options, new_options.rsync_options) != 0)
|
||||
{
|
||||
strcpy(orig_options->rsync_options, new_options.rsync_options);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* ssh_options */
|
||||
if(strcmp(orig_options->ssh_options, new_options.ssh_options) != 0)
|
||||
{
|
||||
strcpy(orig_options->ssh_options, new_options.ssh_options);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* master_response_timeout */
|
||||
if(orig_options->master_response_timeout != new_options.master_response_timeout)
|
||||
{
|
||||
orig_options->master_response_timeout = new_options.master_response_timeout;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* reconnect_attempts */
|
||||
if(orig_options->reconnect_attempts != new_options.reconnect_attempts)
|
||||
{
|
||||
orig_options->reconnect_attempts = new_options.reconnect_attempts;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* reconnect_intvl */
|
||||
if(orig_options->reconnect_intvl != new_options.reconnect_intvl)
|
||||
{
|
||||
orig_options->reconnect_intvl = new_options.reconnect_intvl;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* pg_ctl_options */
|
||||
if(strcmp(orig_options->pg_ctl_options, new_options.pg_ctl_options) != 0)
|
||||
{
|
||||
strcpy(orig_options->pg_ctl_options, new_options.pg_ctl_options);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* pg_basebackup_options */
|
||||
if(strcmp(orig_options->pg_basebackup_options, new_options.pg_basebackup_options) != 0)
|
||||
{
|
||||
strcpy(orig_options->pg_basebackup_options, new_options.pg_basebackup_options);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* monitor_interval_secs */
|
||||
if(orig_options->monitor_interval_secs != new_options.monitor_interval_secs)
|
||||
{
|
||||
orig_options->monitor_interval_secs = new_options.monitor_interval_secs;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* retry_promote_interval_secs */
|
||||
if(orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs)
|
||||
{
|
||||
orig_options->retry_promote_interval_secs = new_options.retry_promote_interval_secs;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* use_replication_slots */
|
||||
if(orig_options->use_replication_slots != new_options.use_replication_slots)
|
||||
{
|
||||
orig_options->use_replication_slots = new_options.use_replication_slots;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
if(config_changed == true)
|
||||
{
|
||||
log_debug(_("reload_config(): configuration has changed\n"));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_debug(_("reload_config(): configuration has not changed\n"));
|
||||
}
|
||||
|
||||
return config_changed;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Split argument into old_dir and new_dir and append to tablespace mapping
|
||||
* list.
|
||||
*
|
||||
* Adapted from pg_basebackup.c
|
||||
*/
|
||||
static void
|
||||
tablespace_list_append(t_configuration_options *options, const char *arg)
|
||||
{
|
||||
TablespaceListCell *cell;
|
||||
char *dst;
|
||||
char *dst_ptr;
|
||||
const char *arg_ptr;
|
||||
|
||||
cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell));
|
||||
if(cell == NULL)
|
||||
{
|
||||
log_err(_("unable to allocate memory; terminating\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
dst_ptr = dst = cell->old_dir;
|
||||
for (arg_ptr = arg; *arg_ptr; arg_ptr++)
|
||||
{
|
||||
if (dst_ptr - dst >= MAXPGPATH)
|
||||
{
|
||||
log_err(_("directory name too long\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=')
|
||||
; /* skip backslash escaping = */
|
||||
else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\'))
|
||||
{
|
||||
if (*cell->new_dir)
|
||||
{
|
||||
log_err(_("multiple \"=\" signs in tablespace mapping\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst = dst_ptr = cell->new_dir;
|
||||
}
|
||||
}
|
||||
else
|
||||
*dst_ptr++ = *arg_ptr;
|
||||
}
|
||||
|
||||
if (!*cell->old_dir || !*cell->new_dir)
|
||||
{
|
||||
log_err(_("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"\n"),
|
||||
arg);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
canonicalize_path(cell->old_dir);
|
||||
canonicalize_path(cell->new_dir);
|
||||
|
||||
if (options->tablespace_mapping.tail)
|
||||
options->tablespace_mapping.tail->next = cell;
|
||||
else
|
||||
options->tablespace_mapping.head = cell;
|
||||
|
||||
options->tablespace_mapping.tail = cell;
|
||||
}
|
||||
|
||||
/*
|
||||
* parse_event_notifications_list()
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
parse_event_notifications_list(t_configuration_options *options, const char *arg)
|
||||
{
|
||||
const char *arg_ptr;
|
||||
char event_type_buf[MAXLEN] = "";
|
||||
char *dst_ptr = event_type_buf;
|
||||
|
||||
|
||||
for (arg_ptr = arg; arg_ptr <= (arg + strlen(arg)); arg_ptr++)
|
||||
{
|
||||
/* ignore whitespace */
|
||||
if(*arg_ptr == ' ' || *arg_ptr == '\t')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* comma (or end-of-string) should mark the end of an event type -
|
||||
* just as long as there was something preceding it
|
||||
*/
|
||||
if((*arg_ptr == ',' || *arg_ptr == '\0') && event_type_buf[0] != '\0')
|
||||
{
|
||||
EventNotificationListCell *cell;
|
||||
|
||||
cell = (EventNotificationListCell *) pg_malloc0(sizeof(EventNotificationListCell));
|
||||
|
||||
if(cell == NULL)
|
||||
{
|
||||
log_err(_("unable to allocate memory; terminating\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
strncpy(cell->event_type, event_type_buf, MAXLEN);
|
||||
|
||||
if (options->event_notifications.tail)
|
||||
{
|
||||
options->event_notifications.tail->next = cell;
|
||||
}
|
||||
else
|
||||
{
|
||||
options->event_notifications.head = cell;
|
||||
}
|
||||
|
||||
options->event_notifications.tail = cell;
|
||||
|
||||
memset(event_type_buf, 0, MAXLEN);
|
||||
dst_ptr = event_type_buf;
|
||||
}
|
||||
/* ignore duplicated commas */
|
||||
else if(*arg_ptr == ',')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst_ptr++ = *arg_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
46
config.h
46
config.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* config.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -20,13 +20,41 @@
|
||||
#ifndef _REPMGR_CONFIG_H_
|
||||
#define _REPMGR_CONFIG_H_
|
||||
|
||||
#include "repmgr.h"
|
||||
#include "postgres_fe.h"
|
||||
|
||||
#include "strutil.h"
|
||||
|
||||
|
||||
typedef struct EventNotificationListCell
|
||||
{
|
||||
struct EventNotificationListCell *next;
|
||||
char event_type[MAXLEN];
|
||||
} EventNotificationListCell;
|
||||
|
||||
typedef struct EventNotificationList
|
||||
{
|
||||
EventNotificationListCell *head;
|
||||
EventNotificationListCell *tail;
|
||||
} EventNotificationList;
|
||||
|
||||
typedef struct TablespaceListCell
|
||||
{
|
||||
struct TablespaceListCell *next;
|
||||
char old_dir[MAXPGPATH];
|
||||
char new_dir[MAXPGPATH];
|
||||
} TablespaceListCell;
|
||||
|
||||
typedef struct TablespaceList
|
||||
{
|
||||
TablespaceListCell *head;
|
||||
TablespaceListCell *tail;
|
||||
} TablespaceList;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char cluster_name[MAXLEN];
|
||||
int node;
|
||||
int upstream_node;
|
||||
char conninfo[MAXLEN];
|
||||
int failover;
|
||||
int priority;
|
||||
@@ -41,17 +69,23 @@ typedef struct
|
||||
int reconnect_attempts;
|
||||
int reconnect_intvl;
|
||||
char pg_bindir[MAXLEN];
|
||||
char pgctl_options[MAXLEN];
|
||||
char pg_ctl_options[MAXLEN];
|
||||
char pg_basebackup_options[MAXLEN];
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", 0, 0 }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
void parse_config(const char *config_file, t_configuration_options * options);
|
||||
|
||||
bool parse_config(const char *config_file, t_configuration_options *options);
|
||||
void parse_line(char *buff, char *name, char *value);
|
||||
char *trim(char *s);
|
||||
bool reload_config(char *config_file, t_configuration_options * orig_options);
|
||||
bool reload_config(char *config_file, t_configuration_options *orig_options);
|
||||
|
||||
#endif
|
||||
|
||||
38
dbutils.h
38
dbutils.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* dbutils.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -20,27 +20,49 @@
|
||||
#ifndef _REPMGR_DBUTILS_H_
|
||||
#define _REPMGR_DBUTILS_H_
|
||||
|
||||
#include "config.h"
|
||||
#include "strutil.h"
|
||||
|
||||
|
||||
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
const char *values[],
|
||||
const bool exit_on_error);
|
||||
bool check_cluster_schema(PGconn *conn);
|
||||
int is_standby(PGconn *conn);
|
||||
int is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
|
||||
bool is_pgup(PGconn *conn, int timeout);
|
||||
char *pg_version(PGconn *conn, char *major_version);
|
||||
int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
int get_master_node_id(PGconn *conn, char *cluster);
|
||||
int get_server_version(PGconn *conn, char *server_version);
|
||||
bool get_cluster_size(PGconn *conn, char *size);
|
||||
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
||||
|
||||
int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value, const char *datatype);
|
||||
|
||||
const char *get_cluster_size(PGconn *conn);
|
||||
PGconn *get_master_connection(PGconn *standby_conn, char *schema, char *cluster,
|
||||
PGconn *get_upstream_connection(PGconn *standby_conn, char *cluster,
|
||||
int node_id,
|
||||
int *upstream_node_id_ptr,
|
||||
char *upstream_conninfo_out);
|
||||
PGconn *get_master_connection(PGconn *standby_conn, char *cluster,
|
||||
int *master_id, char *master_conninfo_out);
|
||||
|
||||
int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* errcode.h
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -25,9 +25,7 @@
|
||||
#define SUCCESS 0
|
||||
#define ERR_BAD_CONFIG 1
|
||||
#define ERR_BAD_RSYNC 2
|
||||
#define ERR_STOP_BACKUP 3
|
||||
#define ERR_NO_RESTART 4
|
||||
#define ERR_NEEDS_XLOG 5
|
||||
#define ERR_DB_CON 6
|
||||
#define ERR_DB_QUERY 7
|
||||
#define ERR_PROMOTED 8
|
||||
@@ -36,5 +34,6 @@
|
||||
#define ERR_FAILOVER_FAIL 11
|
||||
#define ERR_BAD_SSH 12
|
||||
#define ERR_SYS_FAILURE 13
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
2
log.c
2
log.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.c - Logging methods
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This module is a set of methods for logging (currently only syslog)
|
||||
*
|
||||
|
||||
8
log.h
8
log.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -25,15 +25,9 @@
|
||||
#define REPMGR_SYSLOG 1
|
||||
#define REPMGR_STDERR 2
|
||||
|
||||
#if (PG_VERSION_NUM >= 90100)
|
||||
void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
#else
|
||||
void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||
__attribute__((format(printf, 3, 4)));
|
||||
#endif
|
||||
|
||||
/* Standard error logging */
|
||||
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
|
||||
|
||||
@@ -1,18 +1,111 @@
|
||||
###################################################
|
||||
# Replication Manager configuration file
|
||||
# Replication Manager sample configuration file
|
||||
###################################################
|
||||
|
||||
# Required configuration items
|
||||
# ============================
|
||||
#
|
||||
# repmgr and repmgrd require these items to be configured:
|
||||
|
||||
# Cluster name
|
||||
cluster=test
|
||||
|
||||
# Node ID
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
# replication funcion, as this will cause confusion when e.g.
|
||||
# "standby2" is promoted to master)
|
||||
node=2
|
||||
node_name=standby2
|
||||
node_name=node2
|
||||
|
||||
# Connection information
|
||||
conninfo='host=192.168.204.104'
|
||||
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
ssh_options=-o "StrictHostKeyChecking no"
|
||||
# Database connection information
|
||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
|
||||
# Optional configuration items
|
||||
# ============================
|
||||
|
||||
# Replication settings
|
||||
# ---------------------
|
||||
|
||||
# when using cascading replication and a standby is to be connected to an
|
||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||
# must exist before the new standby can be registered. If a standby is
|
||||
# to connect directly to a master node, this parameter is not required.
|
||||
#
|
||||
# upstream_node=1
|
||||
|
||||
# physical replication slots - PostgreSQL 9.4 and later only
|
||||
# (default: 0)
|
||||
#
|
||||
# use_replication_slots=0
|
||||
|
||||
|
||||
# Logging and monitoring settings
|
||||
# -------------------------------
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# (default: NOTICE)
|
||||
loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# (default: STDERR)
|
||||
logfacility=STDERR
|
||||
|
||||
# stderr can be redirected to an arbitrary file:
|
||||
#
|
||||
# logfile='/var/log/repmgr.log'
|
||||
|
||||
# event notifications can be passed to an arbitrary external program
|
||||
# together with the following parameters:
|
||||
#
|
||||
# %n - node ID
|
||||
# %e - event type
|
||||
# %s - success (1 or 0)
|
||||
# %t - timestamp
|
||||
# %d - details
|
||||
#
|
||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||
# so should be quoted in the provided command configuration, e.g.:
|
||||
#
|
||||
# event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones:
|
||||
#
|
||||
# event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
|
||||
# Environment/command settings
|
||||
# ----------------------------
|
||||
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
# pg_bindir=/usr/bin/
|
||||
|
||||
# external command options
|
||||
|
||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
# ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
# external command arguments
|
||||
|
||||
# pg_ctl_options='-s'
|
||||
# pg_basebackup_options='--xlog-method=s'
|
||||
|
||||
|
||||
# Standby clone settings
|
||||
# ----------------------
|
||||
#
|
||||
# These settings apply when cloning a standby (`repmgr standby clone`).
|
||||
|
||||
# Tablespaces can be remapped from one file system location to another:
|
||||
#
|
||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
||||
|
||||
|
||||
# Failover settings (repmgrd)
|
||||
# ---------------------------
|
||||
#
|
||||
# These settings are only applied when repmgrd is running.
|
||||
|
||||
# How many seconds we wait for master response before declaring master failure
|
||||
master_response_timeout=60
|
||||
@@ -22,41 +115,20 @@ reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
|
||||
# Autofailover options
|
||||
failover=manual
|
||||
priority=-1
|
||||
failover=automatic # one of 'automatic', 'manual'
|
||||
priority=100 # a value of zero or less prevents the node being promoted to master
|
||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# Default: NOTICE
|
||||
loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# Default: STDERR
|
||||
logfacility=STDERR
|
||||
|
||||
# path to pg_ctl executable
|
||||
pg_bindir=/usr/bin/
|
||||
|
||||
#
|
||||
# you may add command line arguments for pg_ctl
|
||||
#
|
||||
# pg_ctl_options='-s'
|
||||
|
||||
#
|
||||
# redirect stderr to a logfile
|
||||
#
|
||||
# logfile='/var/log/repmgr.log'
|
||||
|
||||
#
|
||||
# change monitoring interval; default is 2s
|
||||
# monitoring interval; default is 2s
|
||||
#
|
||||
# monitor_interval_secs=2
|
||||
|
||||
#
|
||||
# change wait time for master; before we bail out and exit when the
|
||||
# master disappears, we wait 6 * retry_promote_interval_secs seconds;
|
||||
# by default this would be half an hour (since sleep_delay default
|
||||
# value is 300)
|
||||
# change wait time for master; before we bail out and exit when the master
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
#
|
||||
# retry_promote_interval_secs=300
|
||||
|
||||
|
||||
|
||||
50
repmgr.h
50
repmgr.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -21,20 +21,22 @@
|
||||
#define _REPMGR_H_
|
||||
|
||||
#include "postgres_fe.h"
|
||||
#include "getopt_long.h"
|
||||
#include "libpq-fe.h"
|
||||
|
||||
|
||||
#include "getopt_long.h"
|
||||
|
||||
#include "strutil.h"
|
||||
#include "dbutils.h"
|
||||
#include "errcode.h"
|
||||
#include "config.h"
|
||||
|
||||
#define PRIMARY_MODE 0
|
||||
#define STANDBY_MODE 1
|
||||
#define WITNESS_MODE 2
|
||||
#define MIN_SUPPORTED_VERSION "9.3"
|
||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||
|
||||
#include "config.h"
|
||||
#define MAXFILENAME 1024
|
||||
#define ERRBUFF_SIZE 512
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_CONFIG_FILE "./repmgr.conf"
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
@@ -42,9 +44,22 @@
|
||||
#define DEFAULT_MASTER_PORT "5432"
|
||||
#define DEFAULT_DBNAME "postgres"
|
||||
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define FAILOVER_NODES_MAX_CHECK 50
|
||||
|
||||
#define MANUAL_FAILOVER 0
|
||||
#define AUTOMATIC_FAILOVER 1
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
|
||||
|
||||
typedef enum {
|
||||
UNKNOWN = 0,
|
||||
MASTER,
|
||||
STANDBY,
|
||||
WITNESS
|
||||
} t_server_type;
|
||||
|
||||
|
||||
|
||||
/* Run time options type */
|
||||
typedef struct
|
||||
@@ -63,15 +78,34 @@ typedef struct
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool initdb_no_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
|
||||
bool ignore_external_config_files;
|
||||
char masterport[MAXLEN];
|
||||
char localport[MAXLEN];
|
||||
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
int keep_history;
|
||||
|
||||
char pg_bindir[MAXLEN];
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, "", "", 0}
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, "", "", 0, "", "" }
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
struct ErrorListCell *next;
|
||||
char *error_message;
|
||||
} ErrorListCell;
|
||||
|
||||
typedef struct ErrorList
|
||||
{
|
||||
ErrorListCell *head;
|
||||
ErrorListCell *tail;
|
||||
} ErrorList;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* repmgr.sql
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010
|
||||
#
|
||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
||||
#
|
||||
|
||||
MODULE_big = repmgr_funcs
|
||||
|
||||
76
sql/repmgr2_repmgr3.sql
Normal file
76
sql/repmgr2_repmgr3.sql
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Update a repmgr 2.x installation to repmgr 3.0
|
||||
* ----------------------------------------------
|
||||
*
|
||||
* 1. Stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statements listed below,
|
||||
* taking care to identify the master node and any inactive
|
||||
* nodes
|
||||
* 3. Restart repmgrd (being sure to use repmgr 3.0)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Set the search path to the name of the schema used by
|
||||
* your repmgr installation
|
||||
* (this should be "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf')
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE repl_nodes RENAME TO repl_nodes2_0;
|
||||
|
||||
CREATE TABLE repl_nodes (
|
||||
id INTEGER PRIMARY KEY,
|
||||
type TEXT NOT NULL CHECK (type IN('master','standby','witness')),
|
||||
upstream_node_id INTEGER NULL REFERENCES repl_nodes (id),
|
||||
cluster TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
conninfo TEXT NOT NULL,
|
||||
slot_name TEXT NULL,
|
||||
priority INTEGER NOT NULL,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE
|
||||
);
|
||||
|
||||
INSERT INTO repl_nodes
|
||||
(id, type, cluster, name, conninfo, priority)
|
||||
SELECT id,
|
||||
CASE
|
||||
WHEN witness IS TRUE THEN 'witness'
|
||||
ELSE 'standby'
|
||||
END AS type,
|
||||
cluster,
|
||||
name,
|
||||
conninfo,
|
||||
priority + 100
|
||||
FROM repl_nodes2_0;
|
||||
|
||||
/*
|
||||
* You'll need to set the master explicitly; the following query
|
||||
* should identify the master node ID but will only work if all
|
||||
* standby servers are connected:
|
||||
*
|
||||
* SELECT id FROM repmgr_test.repl_nodes WHERE name NOT IN (SELECT application_name FROM pg_stat_replication)
|
||||
*
|
||||
* If in doubt, execute 'repmgr cluster show' will definitively identify
|
||||
* the master.
|
||||
*/
|
||||
UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
|
||||
|
||||
/* If any nodes are known to be inactive, update them here */
|
||||
|
||||
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
||||
|
||||
/* When you're sure of your changes, commit them */
|
||||
|
||||
-- COMMIT;
|
||||
|
||||
|
||||
/*
|
||||
* execute the following command when you are sure you no longer
|
||||
* require the old table:
|
||||
*/
|
||||
|
||||
-- DROP TABLE repl_nodes2_0;
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "fmgr.h"
|
||||
#include "access/xlog.h"
|
||||
#include "miscadmin.h"
|
||||
#include "replication/walreceiver.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/procarray.h"
|
||||
@@ -58,6 +59,7 @@ PG_FUNCTION_INFO_V1(repmgr_update_last_updated);
|
||||
PG_FUNCTION_INFO_V1(repmgr_get_last_updated);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Module load callback
|
||||
*/
|
||||
@@ -230,3 +232,5 @@ repmgr_get_last_updated(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_TIMESTAMPTZ(last_updated);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr_function.sql
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* uninstall_repmgr_funcs.sql
|
||||
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* strutil.c
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -25,15 +25,9 @@
|
||||
#include "log.h"
|
||||
#include "strutil.h"
|
||||
|
||||
#if (PG_VERSION_NUM >= 90100)
|
||||
static int
|
||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||
#else
|
||||
static int
|
||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||
__attribute__((format(printf, 3, 0)));
|
||||
#endif
|
||||
|
||||
static int
|
||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||
|
||||
16
strutil.h
16
strutil.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* strutil.h
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -31,7 +31,6 @@
|
||||
#define MAXCONNINFO 1024
|
||||
|
||||
|
||||
#if (PG_VERSION_NUM >= 90100)
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
@@ -43,18 +42,5 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
extern int
|
||||
maxlen_snprintf(char *str, const char *format,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
#else
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
__attribute__((format(printf, 3, 4)));
|
||||
|
||||
extern int
|
||||
sqlquery_snprintf(char *str, const char *format,...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
|
||||
extern int
|
||||
maxlen_snprintf(char *str, const char *format,...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
#endif
|
||||
|
||||
#endif /* _STRUTIL_H_ */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* uninstall_repmgr.sql
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user