mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
215 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
274a30efa5 | ||
|
|
db63b5bb1c | ||
|
|
e100728b93 | ||
|
|
d104f2a914 | ||
|
|
2946c097f0 | ||
|
|
a538ceb0ea | ||
|
|
5a2a8d1c82 | ||
|
|
b5a7efa58e | ||
|
|
9f6f58e4ed | ||
|
|
c22f4eaf6f | ||
|
|
925d82f7a4 | ||
|
|
1db577e294 | ||
|
|
a886fddccc | ||
|
|
83e5f98171 | ||
|
|
eb31a56186 | ||
|
|
8cd2c6fd05 | ||
|
|
e3e1c5de4e | ||
|
|
f9a150504a | ||
|
|
5bc809466c | ||
|
|
5d32026b79 | ||
|
|
2a8d6f72c6 | ||
|
|
190cc7dcb4 | ||
|
|
819937d4bd | ||
|
|
57299cb978 | ||
|
|
59f503835b | ||
|
|
33e626cd75 | ||
|
|
491ec37adf | ||
|
|
c93790fc96 | ||
|
|
ecabe2c294 | ||
|
|
2ba57e5938 | ||
|
|
2eec17e25f | ||
|
|
c48c248c15 | ||
|
|
958e45f2b8 | ||
|
|
daafd70383 | ||
|
|
c828598bfb | ||
|
|
b55519c4a2 | ||
|
|
4cafd443e1 | ||
|
|
d400d7f9ac | ||
|
|
62bb3db1f8 | ||
|
|
d9961bbb17 | ||
|
|
e1b8982c14 | ||
|
|
2fe3b3c2a3 | ||
|
|
c6e1bc205a | ||
|
|
7241391ddc | ||
|
|
c8f449f178 | ||
|
|
49420c437f | ||
|
|
827ffef5f9 | ||
|
|
16296bb1c3 | ||
|
|
c9c18d6216 | ||
|
|
d21f506614 | ||
|
|
fbad18085e | ||
|
|
ca08b1c3bb | ||
|
|
3d95fab0ac | ||
|
|
12d6ce4629 | ||
|
|
dfb34ae7b6 | ||
|
|
98c4eb002a | ||
|
|
faed8a65f7 | ||
|
|
a81cf04614 | ||
|
|
ca6cbcf965 | ||
|
|
16c1e13019 | ||
|
|
1375adcac8 | ||
|
|
e859a58405 | ||
|
|
1a6d830314 | ||
|
|
a96f478a43 | ||
|
|
8f20ab16dd | ||
|
|
3ec436f30d | ||
|
|
61e00bf1c7 | ||
|
|
5d71869fc1 | ||
|
|
7598e08b6f | ||
|
|
ba71e1eedf | ||
|
|
a4c07b23fb | ||
|
|
0c36f921f7 | ||
|
|
8ac5a5444e | ||
|
|
f60e7346e2 | ||
|
|
855ca8fe1a | ||
|
|
daa79d1a0f | ||
|
|
211768d911 | ||
|
|
f982708b35 | ||
|
|
995083d66c | ||
|
|
be58d6af96 | ||
|
|
a52e97e622 | ||
|
|
cc1ea00333 | ||
|
|
ec3596521f | ||
|
|
66245ccc03 | ||
|
|
c7542063be | ||
|
|
2633d994ef | ||
|
|
5359d45463 | ||
|
|
efa60d142c | ||
|
|
f3d0ab9ab9 | ||
|
|
7e6bac1be6 | ||
|
|
b72058dba8 | ||
|
|
79d1332f9c | ||
|
|
cde721e3fc | ||
|
|
7b2439b824 | ||
|
|
787cd94142 | ||
|
|
056e64f635 | ||
|
|
6b5a609d30 | ||
|
|
7a4d84379c | ||
|
|
490e12b1af | ||
|
|
7b9df3ac8f | ||
|
|
d6bf870316 | ||
|
|
b15e8debe1 | ||
|
|
310faf1bd9 | ||
|
|
35caeaa66a | ||
|
|
ba300c58f7 | ||
|
|
f2370de2fa | ||
|
|
3920deb803 | ||
|
|
e452bf6601 | ||
|
|
167b4efbb3 | ||
|
|
56b9ca7992 | ||
|
|
9c002c7e38 | ||
|
|
cfec04d19f | ||
|
|
4f1c67a1bf | ||
|
|
2f4fd2b7fa | ||
|
|
aca2b9547f | ||
|
|
c9db7f57d2 | ||
|
|
96ac39ba0f | ||
|
|
88a3378203 | ||
|
|
4db0efab47 | ||
|
|
864d57953a | ||
|
|
84d2a292b2 | ||
|
|
62d53b7622 | ||
|
|
77d52adb53 | ||
|
|
7a3e2f2a3a | ||
|
|
120688013e | ||
|
|
f6d1db5edb | ||
|
|
02729d299b | ||
|
|
88a6a1376e | ||
|
|
67df082ee9 | ||
|
|
9ed71d6317 | ||
|
|
933647d6de | ||
|
|
f99018b202 | ||
|
|
ced87373cd | ||
|
|
1db22546a9 | ||
|
|
7ae0df9c85 | ||
|
|
7a80f7a096 | ||
|
|
8710e067d0 | ||
|
|
793950eabd | ||
|
|
d1b4280182 | ||
|
|
64d038c823 | ||
|
|
46dd734b3d | ||
|
|
0a2e4466aa | ||
|
|
17ab86f7ac | ||
|
|
d433982af7 | ||
|
|
869b6a7a06 | ||
|
|
9018dc65de | ||
|
|
9cbd8df089 | ||
|
|
67a81d1d47 | ||
|
|
ab70007b75 | ||
|
|
0145aa0fc3 | ||
|
|
493c307b23 | ||
|
|
fc6225a511 | ||
|
|
e3111d37ba | ||
|
|
2a1a9f2e61 | ||
|
|
71a667ecb8 | ||
|
|
3ab91730c3 | ||
|
|
dd7f9b79ae | ||
|
|
8ab1901a93 | ||
|
|
e0cbdd5b31 | ||
|
|
d62aaeedd0 | ||
|
|
05cc7091b5 | ||
|
|
d192d5665c | ||
|
|
3848b9011b | ||
|
|
487aadc4b9 | ||
|
|
3f5920a395 | ||
|
|
617ea8cb78 | ||
|
|
142517fcca | ||
|
|
d722e2c74b | ||
|
|
abb02cab76 | ||
|
|
8e66e4811c | ||
|
|
ce5a541960 | ||
|
|
e12be52fa8 | ||
|
|
c0911d3286 | ||
|
|
6e94432282 | ||
|
|
29d9232e2f | ||
|
|
8973812144 | ||
|
|
e775a962ad | ||
|
|
12204f7e56 | ||
|
|
684f7590b7 | ||
|
|
9d589a780d | ||
|
|
83e6d15410 | ||
|
|
6a10fe0cd9 | ||
|
|
c664682c05 | ||
|
|
44acc8d719 | ||
|
|
b911483d5e | ||
|
|
ee9270fe8d | ||
|
|
d0a4eebeec | ||
|
|
0f5e71f029 | ||
|
|
dbd90d45f5 | ||
|
|
c8d0fb401f | ||
|
|
afda3419cc | ||
|
|
a86fa4ad4a | ||
|
|
7e3007f6e8 | ||
|
|
8c797a8fea | ||
|
|
56cec22f22 | ||
|
|
b61649a3e3 | ||
|
|
ded716e403 | ||
|
|
d639dc3342 | ||
|
|
17ed81ebb7 | ||
|
|
b00c507ee4 | ||
|
|
55d8b2ad9c | ||
|
|
c918aaad4a | ||
|
|
6e7eee4c01 | ||
|
|
5c59e8fc5b | ||
|
|
eba0b6bb1e | ||
|
|
3bc0b80a71 | ||
|
|
06b9e0a8ec | ||
|
|
120be2db1c | ||
|
|
12bd7da836 | ||
|
|
2fd905cf9e | ||
|
|
dd7ebdc1c7 | ||
|
|
1636805fa1 | ||
|
|
899d789699 | ||
|
|
cd7a3215df | ||
|
|
f8fd344d9f |
@@ -2,7 +2,7 @@ License and Contributions
|
||||
=========================
|
||||
|
||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||
Copyright 2010-2015, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||
Copyright 2010-2016, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||
details.
|
||||
|
||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2010-2015, 2ndQuadrant Limited
|
||||
Copyright (c) 2010-2016, 2ndQuadrant Limited
|
||||
All rights reserved.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
||||
238
FAILOVER.rst
238
FAILOVER.rst
@@ -1,237 +1 @@
|
||||
====================================================
|
||||
PostgreSQL Automatic Failover - User Documentation
|
||||
====================================================
|
||||
|
||||
Automatic Failover
|
||||
==================
|
||||
|
||||
repmgr allows for automatic failover when it detects the failure of the master node.
|
||||
Following is a quick setup for this.
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
For convenience, we define:
|
||||
|
||||
**node1**
|
||||
is the fully qualified domain name of the Master server, IP 192.168.1.10
|
||||
**node2**
|
||||
is the fully qualified domain name of the Standby server, IP 192.168.1.11
|
||||
**witness**
|
||||
is the fully qualified domain name of the server used as a witness, IP 192.168.1.12
|
||||
|
||||
**Note:** We don't recommend using names with the status of a server like «masterserver»,
|
||||
because it would be confusing once a failover takes place and the Master is
|
||||
now on the «standbyserver».
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
2 PostgreSQL servers are involved in the replication. Automatic failover needs
|
||||
a vote to decide what server it should promote, so an odd number is required.
|
||||
A witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
||||
cluster to communicate with other repmgrd daemons.
|
||||
|
||||
1. Install PostgreSQL in all the servers involved (including the witness server)
|
||||
|
||||
2. Install repmgr in all the servers involved (including the witness server)
|
||||
|
||||
3. Configure the Master PostreSQL
|
||||
|
||||
4. Clone the Master to the Standby using "repmgr standby clone" command
|
||||
|
||||
5. Configure repmgr in all the servers involved (including the witness server)
|
||||
|
||||
6. Register Master and Standby nodes
|
||||
|
||||
7. Initiate witness server
|
||||
|
||||
8. Start the repmgrd daemons in all nodes
|
||||
|
||||
**Note** A complete High-Availability design needs at least 3 servers to still have
|
||||
a backup node after a first failure.
|
||||
|
||||
Install PostgreSQL
|
||||
------------------
|
||||
|
||||
You can install PostgreSQL using any of the recommended methods. You should ensure
|
||||
it's 9.0 or later.
|
||||
|
||||
Install repmgr
|
||||
--------------
|
||||
|
||||
Install repmgr following the steps in the README file.
|
||||
|
||||
Configure PostreSQL
|
||||
-------------------
|
||||
|
||||
Log in to node1.
|
||||
|
||||
Edit the file postgresql.conf and modify the parameters::
|
||||
|
||||
listen_addresses='*'
|
||||
wal_level = 'hot_standby'
|
||||
archive_mode = on
|
||||
archive_command = 'cd .' # we can also use exit 0, anything that
|
||||
# just does nothing
|
||||
max_wal_senders = 10
|
||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||
hot_standby = on
|
||||
shared_preload_libraries = 'repmgr_funcs'
|
||||
|
||||
Edit the file pg_hba.conf and add lines for the replication::
|
||||
|
||||
host repmgr repmgr 127.0.0.1/32 trust
|
||||
host repmgr repmgr 192.168.1.10/30 trust
|
||||
host replication all 192.168.1.10/30 trust
|
||||
|
||||
**Note:** It is also possible to use a password authentication (md5), .pgpass file
|
||||
should be edited to allow connection between each node.
|
||||
|
||||
Create the user and database to manage replication::
|
||||
|
||||
su - postgres
|
||||
createuser -s repmgr
|
||||
createdb -O repmgr repmgr
|
||||
psql -f /usr/share/postgresql/9.0/contrib/repmgr_funcs.sql repmgr
|
||||
|
||||
Restart the PostgreSQL server::
|
||||
|
||||
pg_ctl -D $PGDATA restart
|
||||
|
||||
And check everything is fine in the server log.
|
||||
|
||||
Create the ssh-key for the postgres user and copy it to other servers::
|
||||
|
||||
su - postgres
|
||||
ssh-keygen # /!\ do not use a passphrase /!\
|
||||
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
||||
chmod 600 ~/.ssh/authorized_keys
|
||||
exit
|
||||
rsync -avz ~postgres/.ssh/authorized_keys node2:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/authorized_keys witness:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/id_rsa* node2:~postgres/.ssh/
|
||||
rsync -avz ~postgres/.ssh/id_rsa* witness:~postgres/.ssh/
|
||||
|
||||
Clone Master
|
||||
------------
|
||||
|
||||
Log in to node2.
|
||||
|
||||
Clone node1 (the current Master)::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h node1 standby clone
|
||||
|
||||
Start the PostgreSQL server::
|
||||
|
||||
pg_ctl -D $PGDATA start
|
||||
|
||||
And check everything is fine in the server log.
|
||||
|
||||
Configure repmgr
|
||||
----------------
|
||||
|
||||
Log in to each server and configure repmgr by editing the file
|
||||
/etc/repmgr/repmgr.conf::
|
||||
|
||||
cluster=my_cluster
|
||||
node=1
|
||||
node_name=earth
|
||||
conninfo='host=192.168.1.10 dbname=repmgr user=repmgr'
|
||||
master_response_timeout=60
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
failover=automatic
|
||||
promote_command='promote_command.sh'
|
||||
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
|
||||
|
||||
**cluster**
|
||||
is the name of the current replication.
|
||||
**node**
|
||||
is the number of the current node (1, 2 or 3 in the current example).
|
||||
**node_name**
|
||||
is an identifier for every node.
|
||||
**conninfo**
|
||||
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration you need to add a 'port=5499' to the conninfo.
|
||||
**master_response_timeout**
|
||||
is the maximum amount of time we are going to wait before deciding the master has died and start the failover procedure.
|
||||
**reconnect_attempts**
|
||||
is the number of times we will try to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**reconnect_interval**
|
||||
is the amount of time between retries to reconnect to master after a failure has been detected and before start the failover procedure.
|
||||
**failover**
|
||||
configure behavior: *manual* or *automatic*.
|
||||
**promote_command**
|
||||
the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
|
||||
**follow_command**
|
||||
the command executed to address the current standby to another Master. The command must return 0 on success.
|
||||
|
||||
Register Master and Standby
|
||||
---------------------------
|
||||
|
||||
Log in to node1.
|
||||
|
||||
Register the node as Master::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||
|
||||
Log in to node2. Register it as a standby::
|
||||
|
||||
su - postgres
|
||||
repmgr -f /etc/repmgr/repmgr.conf standby register
|
||||
|
||||
Initialize witness server
|
||||
-------------------------
|
||||
|
||||
Log in to witness.
|
||||
|
||||
Initialize the witness server::
|
||||
|
||||
su - postgres
|
||||
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
|
||||
|
||||
The witness server needs the following information from the command
|
||||
line:
|
||||
|
||||
* Connection details for the current master, to copy the cluster
|
||||
configuration.
|
||||
* A location for initializing its own $PGDATA.
|
||||
|
||||
repmgr will also ask for the superuser password on the witness database so
|
||||
it can reconnect when needed (the command line option --initdb-no-pwprompt
|
||||
will set up a password-less superuser).
|
||||
|
||||
By default the witness server will listen on port 5499; this value can be
|
||||
overridden by explicitly providing the port number in the conninfo string
|
||||
in repmgr.conf. (Note that it is also possible to specify the port number
|
||||
with the -l/--local-port option, however this option is now deprecated and
|
||||
will be overridden by a port setting in the conninfo string).
|
||||
|
||||
Start the repmgrd daemons
|
||||
-------------------------
|
||||
|
||||
Log in to node2 and witness::
|
||||
|
||||
su - postgres
|
||||
repmgrd -f /etc/repmgr/repmgr.conf --daemonize -> /var/log/postgresql/repmgr.log 2>&1
|
||||
|
||||
**Note:** The Master does not need a repmgrd daemon.
|
||||
|
||||
Suspend Automatic behavior
|
||||
==========================
|
||||
|
||||
Edit the repmgr.conf of the node to remove from automatic processing and change::
|
||||
|
||||
failover=manual
|
||||
|
||||
Then, signal repmgrd daemon::
|
||||
|
||||
su - postgres
|
||||
kill -HUP $(pidof repmgrd)
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
The repmgr documentation is in the README file (how to build, options, etc.)
|
||||
The contents of this file have been incorporated into the main README.md document.
|
||||
|
||||
39
FAQ.md
39
FAQ.md
@@ -34,6 +34,11 @@ General
|
||||
replication slots, setting a higher figure will make adding new nodes
|
||||
easier.
|
||||
|
||||
- Does `repmgr` support hash indexes?
|
||||
|
||||
No. Hash indexes and replication do not mix well and their use is
|
||||
explicitly discouraged; see:
|
||||
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||
|
||||
`repmgr`
|
||||
--------
|
||||
@@ -96,8 +101,9 @@ General
|
||||
is intended to support running the witness server as a separate
|
||||
instance on a normal node server, rather than on its own dedicated server.
|
||||
|
||||
To specify a port for the witness server, supply the port number to
|
||||
repmgr with the `-l/--local-port` command line option.
|
||||
To specify different port for the witness server, supply the port number
|
||||
in the `conninfo` string in `repmgr.conf`
|
||||
(repmgr 3.0.1 and earlier: use the `-l/--local-port` option)
|
||||
|
||||
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
||||
in `postgresql.conf` if I'm not using `repmgrd`?
|
||||
@@ -106,6 +112,30 @@ General
|
||||
If you later decide to run `repmgrd`, you just need to add
|
||||
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
||||
|
||||
- I've provided replication permission for the `repmgr` user in `pg_hba.conf`
|
||||
but `repmgr`/`repmgrd` complains it can't connect to the server... Why?
|
||||
|
||||
`repmgr`/`repmgrd` need to be able to connect to the repmgr database
|
||||
with a normal connection to query metadata. The `replication` connection
|
||||
permission is for PostgreSQL's streaming replication and doesn't
|
||||
necessarily need to be the `repmgr` user.
|
||||
|
||||
- When cloning a standby, why do I need to provide the connection parameters
|
||||
for the primary server on the command line, not in the configuration file?
|
||||
|
||||
Cloning a standby is a one-time action; the role of the server being cloned
|
||||
from could change, so fixing it in the configuration file would create
|
||||
confusion. If `repmgr` needs to establish a connection to the primary
|
||||
server, it can retrieve this from the `repl_nodes` table or if necessary
|
||||
scan the replication cluster until it locates the active primary.
|
||||
|
||||
- Why is there no foreign key on the `node_id` column in the `repl_events`
|
||||
table?
|
||||
|
||||
Under some circumstances event notifications can be generated for servers
|
||||
which have not yet been registered; it's also useful to retain a record
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the `repl_nodes` table.
|
||||
|
||||
`repmgrd`
|
||||
---------
|
||||
@@ -134,3 +164,8 @@ General
|
||||
|
||||
Note that after registering a delayed standby, `repmgrd` will only start
|
||||
once the metadata added in the master node has been replicated.
|
||||
|
||||
- How can I get `repmgrd` to rotate its logfile?
|
||||
|
||||
Configure your system's `logrotate` service to do this; see example
|
||||
in README.md
|
||||
|
||||
43
HISTORY
43
HISTORY
@@ -1,4 +1,41 @@
|
||||
3.0.2 2015-09-
|
||||
3.1.2 2016-04-12
|
||||
Fix pg_ctl path generation in do_standby_switchover() (Ian)
|
||||
Regularly sync witness server repl_nodes table (Ian)
|
||||
Documentation improvements (Gianni, dhyannataraj)
|
||||
(Experimental) ensure repmgr handles failover slots when copying
|
||||
in rsync mode (Craig, Ian)
|
||||
rsync mode handling fixes (Martín)
|
||||
Enable repmgr to compile against 9.6devel (Ian)
|
||||
|
||||
3.1.1 2016-02-24
|
||||
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
|
||||
Prevent repmgr/repmgrd running as root (Ian)
|
||||
|
||||
3.1.0 2016-02-01
|
||||
Add "repmgr standby switchover" command (Ian)
|
||||
Revised README file (Ian)
|
||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||
Improve -?/--help output, showing default values if relevant (Ian)
|
||||
Various bugfixes to command line/configuration parameter handling (Ian)
|
||||
|
||||
3.0.3 2016-01-04
|
||||
Create replication slot if required before base backup is run (Abhijit)
|
||||
standy clone: when using rsync, clean up "pg_replslot" directory (Ian)
|
||||
Improve --help output (Ian)
|
||||
Improve config file parsing (Ian)
|
||||
Various logging output improvements, including explicit HINTS (Ian)
|
||||
Add --log-level to explicitly set log level on command line (Ian)
|
||||
Repurpose --verbose to display extra log output (Ian)
|
||||
Add --terse to hide hints and other non-critical output (Ian)
|
||||
Reference internal functions with explicit catalog path (Ian)
|
||||
When following a new primary, have repmgr (not repmgrd) create the new slot (Ian)
|
||||
Add /etc/repmgr.conf as a default configuration file location (Ian)
|
||||
Prevent repmgrd's -v/--verbose option expecting a parameter (Ian)
|
||||
Prevent invalid replication_lag values being written to the monitoring table (Ian)
|
||||
Improve repmgrd behaviour when monitored standby node is temporarily
|
||||
unavailable (Martín)
|
||||
|
||||
3.0.2 2015-10-02
|
||||
Improve handling of --help/--version options; and improve help output (Ian)
|
||||
Improve handling of situation where logfile can't be opened (Ian)
|
||||
Always pass -D/--pgdata option to pg_basebackup (Ian)
|
||||
@@ -12,7 +49,9 @@
|
||||
Update tablespace remapping in --rsync-only mode for 9.5 and later (Ian)
|
||||
Deprecate `-l/--local-port` option - the port can be extracted
|
||||
from the conninfo string in repmgr.conf (Ian)
|
||||
Add STANDBY UNREGISTE (Vik Fearing)
|
||||
Add STANDBY UNREGISTER (Vik Fearing)
|
||||
Don't fail with error when registering master if schema already defined (Ian)
|
||||
Fixes to whitespace handling when parsing config file (Ian)
|
||||
|
||||
3.0.1 2015-04-16
|
||||
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
||||
|
||||
48
Makefile
48
Makefile
@@ -1,6 +1,8 @@
|
||||
#
|
||||
# Makefile
|
||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
@@ -8,17 +10,24 @@ repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||
DATA = repmgr.sql uninstall_repmgr.sql
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
all: repmgrd repmgr
|
||||
|
||||
all: repmgrd repmgr
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgrd: $(repmgrd_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
||||
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
$(MAKE) -C sql
|
||||
|
||||
repmgr: $(repmgr_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
|
||||
|
||||
# Make all objects depend on all include files. This is a bit of a
|
||||
# shotgun approach, but the codebase is small enough that a complete rebuild
|
||||
# is very fast anyway.
|
||||
$(repmgr_OBJS): $(HEADERS)
|
||||
$(repmgrd_OBJS): $(HEADERS)
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
@@ -31,8 +40,8 @@ include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
||||
# is overriding pgxs install.
|
||||
# XXX: This overrides the pgxs install target - we're building two binaries,
|
||||
# which is not supported by pgxs.mk's PROGRAM construct.
|
||||
install: install_prog install_ext
|
||||
|
||||
install_prog:
|
||||
@@ -43,6 +52,12 @@ install_prog:
|
||||
install_ext:
|
||||
$(MAKE) -C sql install
|
||||
|
||||
# Distribution-specific package building targets
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# XXX we recommend using the PGDG-supplied packages where possible;
|
||||
# see README.md for details.
|
||||
|
||||
install_rhel:
|
||||
mkdir -p '$(DESTDIR)/etc/init.d/'
|
||||
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
|
||||
@@ -67,16 +82,21 @@ clean:
|
||||
rm -f repmgr
|
||||
$(MAKE) -C sql clean
|
||||
|
||||
# Get correct version numbers and install paths, depending on your postgres version
|
||||
PG_VERSION = $(shell pg_config --version | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
|
||||
REPMGR_VERSION = $(shell grep REPMGR_VERSION version.h | cut -d ' ' -f 3 | cut -d '"' -f 2)
|
||||
PKGLIBDIR = $(shell pg_config --pkglibdir)
|
||||
SHAREDIR = $(shell pg_config --sharedir)
|
||||
|
||||
deb: repmgrd repmgr
|
||||
mkdir -p ./debian/usr/bin
|
||||
cp repmgrd repmgr ./debian/usr/bin/
|
||||
mkdir -p ./debian/usr/share/postgresql/9.0/contrib/
|
||||
cp sql/repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
|
||||
cp sql/uninstall_repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
|
||||
mkdir -p ./debian/usr/lib/postgresql/9.0/lib/
|
||||
cp sql/repmgr_funcs.so ./debian/usr/lib/postgresql/9.0/lib/
|
||||
mkdir -p ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
cp sql/uninstall_repmgr_funcs.sql ./debian$(SHAREDIR)/contrib/
|
||||
mkdir -p ./debian$(PKGLIBDIR)/
|
||||
cp sql/repmgr_funcs.so ./debian$(PKGLIBDIR)/
|
||||
dpkg-deb --build debian
|
||||
mv debian.deb ../postgresql-repmgr-9.0_1.0.0.deb
|
||||
mv debian.deb ../postgresql-repmgr-$(PG_VERSION)_$(REPMGR_VERSION).deb
|
||||
rm -rf ./debian/usr
|
||||
|
||||
|
||||
|
||||
119
QUICKSTART.md
119
QUICKSTART.md
@@ -1,118 +1 @@
|
||||
repmgr quickstart guide
|
||||
=======================
|
||||
|
||||
This quickstart guide provides some annotated examples on basic
|
||||
`repmgr` setup. It assumes you are familiar with PostgreSQL replication
|
||||
concepts setup and Linux/UNIX system administration.
|
||||
|
||||
For the purposes of this guide, we'll assume the database user will be
|
||||
`repmgr_usr` and the database will be `repmgr_db`.
|
||||
|
||||
|
||||
Master setup
|
||||
------------
|
||||
|
||||
1. Configure PostgreSQL
|
||||
|
||||
- create user and database:
|
||||
|
||||
```
|
||||
CREATE ROLE repmgr_usr LOGIN SUPERUSER;
|
||||
CREATE DATABASE repmgr_db OWNER repmgr_usr;
|
||||
```
|
||||
|
||||
- configure `postgresql.conf` for replication (see README.md for sample
|
||||
settings)
|
||||
|
||||
- update `pg_hba.conf`, e.g.:
|
||||
|
||||
```
|
||||
host repmgr_db repmgr_usr 192.168.1.0/24 trust
|
||||
host replication repmgr_usr 192.168.1.0/24 trust
|
||||
```
|
||||
|
||||
Restart the PostgreSQL server after making these changes.
|
||||
|
||||
2. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat /path/to/repmgr/node1/repmgr.conf
|
||||
cluster=test
|
||||
node=1
|
||||
node_name=node1
|
||||
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
(For an annotated `repmgr.conf` file, see `repmgr.conf.sample` in the
|
||||
repository's root directory).
|
||||
|
||||
3. Register the master node with `repmgr`:
|
||||
|
||||
$ repmgr -f /path/to/repmgr/node1/repmgr.conf --verbose master register
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 17:45:53] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 17:45:53] [INFO] master register: creating database objects inside the repmgr_test schema
|
||||
[2015-03-03 17:45:53] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
Standby setup
|
||||
-------------
|
||||
|
||||
1. Use `repmgr standby clone` to clone a standby from the master:
|
||||
|
||||
repmgr -D /path/to/standby/data -d repmgr_db -U repmgr_usr --verbose standby clone 192.168.1.2
|
||||
[2015-03-03 18:18:21] [NOTICE] No configuration file provided and default file './repmgr.conf' not found - continuing with default values
|
||||
[2015-03-03 18:18:21] [NOTICE] repmgr Destination directory ' /path/to/standby/data' provided
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connecting to upstream node
|
||||
[2015-03-03 18:18:21] [INFO] repmgr connected to upstream node, checking its state
|
||||
[2015-03-03 18:18:21] [INFO] Successfully connected to upstream node. Current installation size is 27 MB
|
||||
[2015-03-03 18:18:21] [NOTICE] Starting backup...
|
||||
[2015-03-03 18:18:21] [INFO] creating directory " /path/to/standby/data"...
|
||||
[2015-03-03 18:18:21] [INFO] Executing: 'pg_basebackup -l "repmgr base backup" -h localhost -p 9595 -U repmgr_usr -D /path/to/standby/data '
|
||||
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
|
||||
[2015-03-03 18:18:23] [NOTICE] repmgr standby clone (using pg_basebackup) complete
|
||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
||||
|
||||
Note that the `repmgr.conf` file is not required when cloning a standby.
|
||||
However we recommend providing a valid `repmgr.conf` if you wish to use
|
||||
replication slots, or want `repmgr` to log the clone event to the
|
||||
`repl_events` table.
|
||||
|
||||
This will clone the PostgreSQL database files from the master, including its
|
||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||
the `recovery.conf` file containing the correct parameters to start streaming
|
||||
from the primary node.
|
||||
|
||||
2. Start the PostgreSQL server
|
||||
|
||||
3. Create the `repmgr` configuration file:
|
||||
|
||||
$ cat /path/node2/repmgr/repmgr.conf
|
||||
cluster=test
|
||||
node=2
|
||||
node_name=node2
|
||||
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
|
||||
pg_bindir=/path/to/postgres/bin
|
||||
|
||||
4. Register the standby node with `repmgr`:
|
||||
|
||||
$ repmgr -f /path/to/repmgr/node2/repmgr.conf --verbose standby register
|
||||
[2015-03-03 18:24:34] [NOTICE] Opening configuration file: /path/to/repmgr/node2/repmgr.conf
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to standby database
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to master database
|
||||
[2015-03-03 18:24:34] [INFO] finding node list for cluster 'test'
|
||||
[2015-03-03 18:24:34] [INFO] checking role of cluster node '1'
|
||||
[2015-03-03 18:24:34] [INFO] repmgr connected to master, checking its state
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby
|
||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby complete
|
||||
[2015-03-03 18:24:34] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||
|
||||
|
||||
This concludes the basic `repmgr` setup of master and standby. The records
|
||||
created in the `repl_nodes` table should look something like this:
|
||||
|
||||
repmgr_db=# SELECT * from repmgr_test.repl_nodes;
|
||||
id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active
|
||||
----+---------+------------------+---------+-------+----------------------------------------------------+-----------+----------+--------
|
||||
1 | primary | | test | node1 | host=repmgr_node1 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
2 | standby | 1 | test | node2 | host=repmgr_node2 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
||||
(2 rows)
|
||||
The contents of this file have been incorporated into the main README.md document.
|
||||
|
||||
35
TODO
35
TODO
@@ -7,6 +7,7 @@ Known issues in repmgr
|
||||
|
||||
* PGPASSFILE may not be passed to pg_basebackup
|
||||
|
||||
|
||||
Planned feature improvements
|
||||
============================
|
||||
|
||||
@@ -39,6 +40,34 @@ Planned feature improvements
|
||||
* make old master node ID available for event notification commands
|
||||
(See github issue #80).
|
||||
|
||||
* Have pg_basebackup use replication slots, if and when support for
|
||||
this is added; see:
|
||||
http://www.postgresql.org/message-id/555DD2B2.7020000@gmx.net
|
||||
* repmgr standby clone: possibility to use barman instead of performing a new base backup
|
||||
|
||||
* possibility to transform a failed master into a new standby with pg_rewind
|
||||
|
||||
* "repmgr standby switchover" to promote a standby in a controlled manner
|
||||
and convert the existing primary into a standby
|
||||
|
||||
* make repmgrd more robust
|
||||
|
||||
* repmgr: when cloning a standby using pg_basebackup and replication slots are
|
||||
requested, activate the replication slot using pg_receivexlog to negate the
|
||||
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
||||
|
||||
* Take into account the fact that a standby can obtain WAL from an archive,
|
||||
so even if direct streaming replication is interrupted, it may be up-to-date
|
||||
|
||||
Usability improvements
|
||||
======================
|
||||
|
||||
* repmgr: add interrupt handler, so that if the program is interrupted
|
||||
while running a backup, an attempt can be made to execute pg_stop_backup()
|
||||
on the primary, to prevent an orphaned backup state existing.
|
||||
|
||||
* repmgr: when unregistering a node, delete any entries in the repl_monitoring
|
||||
table.
|
||||
|
||||
* repmgr: for "standby unregister", accept connection parameters for the
|
||||
primary and perform metadata updates (and slot removal) directly on
|
||||
the primary, to allow a shutdown standby to be unregistered
|
||||
(currently the standby must still be running, which means the replication
|
||||
slot can't be dropped).
|
||||
|
||||
10
check_dir.c
10
check_dir.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* check_dir.c - Directories management functions
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -320,10 +320,10 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
||||
}
|
||||
else if (pg_dir && !force)
|
||||
{
|
||||
log_warning(_("\nThis looks like a PostgreSQL directory.\n"
|
||||
"If you are sure you want to clone here, "
|
||||
"please check there is no PostgreSQL server "
|
||||
"running and use the --force option\n"));
|
||||
log_hint(_("This looks like a PostgreSQL directory.\n"
|
||||
"If you are sure you want to clone here, "
|
||||
"please check there is no PostgreSQL server "
|
||||
"running and use the -F/--force option\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* check_dir.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
||||
461
config.c
461
config.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* config.c - Functions to parse the config file
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -26,9 +26,25 @@
|
||||
|
||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||
static void exit_with_errors(ErrorList *config_errors);
|
||||
|
||||
const static char *_progname = '\0';
|
||||
static char config_file_path[MAXPGPATH];
|
||||
static bool config_file_provided = false;
|
||||
bool config_file_found = false;
|
||||
|
||||
|
||||
void
|
||||
set_progname(const char *argv0)
|
||||
{
|
||||
_progname = get_progname(argv0);
|
||||
}
|
||||
|
||||
const char *
|
||||
progname(void)
|
||||
{
|
||||
return _progname;
|
||||
}
|
||||
|
||||
/*
|
||||
* load_config()
|
||||
@@ -40,61 +56,123 @@ static bool config_file_provided = false;
|
||||
*
|
||||
* Any configuration options changed in this function must also be changed in
|
||||
* reload_config()
|
||||
*
|
||||
* NOTE: this function is called before the logger is set up, so we need
|
||||
* to handle the verbose option ourselves; also the default log level is NOTICE,
|
||||
* so we can't use DEBUG.
|
||||
*/
|
||||
bool
|
||||
load_config(const char *config_file, t_configuration_options *options, char *argv0)
|
||||
load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0)
|
||||
{
|
||||
struct stat config;
|
||||
/* Sanity checks */
|
||||
struct stat stat_config;
|
||||
|
||||
/*
|
||||
* If a configuration file was provided, check it exists, otherwise
|
||||
* emit an error and terminate
|
||||
* emit an error and terminate. We assume that if a user explicitly
|
||||
* provides a configuration file, they'll want to make sure it's
|
||||
* used and not fall back to any of the defaults.
|
||||
*/
|
||||
if (config_file[0])
|
||||
{
|
||||
strncpy(config_file_path, config_file, MAXPGPATH);
|
||||
canonicalize_path(config_file_path);
|
||||
|
||||
if (stat(config_file_path, &config) != 0)
|
||||
if (stat(config_file_path, &stat_config) != 0)
|
||||
{
|
||||
log_err(_("provided configuration file '%s' not found: %s\n"),
|
||||
log_err(_("provided configuration file \"%s\" not found: %s\n"),
|
||||
config_file,
|
||||
strerror(errno)
|
||||
);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("using configuration file \"%s\"\n"), config_file);
|
||||
}
|
||||
|
||||
config_file_provided = true;
|
||||
config_file_found = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If no configuration file was provided, attempt to find a default file
|
||||
* in this order:
|
||||
* - current directory
|
||||
* - /etc/repmgr.conf
|
||||
* - default sysconfdir
|
||||
*
|
||||
* here we just check for the existence of the file; parse_config()
|
||||
* will handle read errors etc.
|
||||
*/
|
||||
if (config_file_provided == false)
|
||||
{
|
||||
char my_exec_path[MAXPGPATH];
|
||||
char etc_path[MAXPGPATH];
|
||||
char sysconf_etc_path[MAXPGPATH];
|
||||
|
||||
/* First check if one is in the default sysconfdir */
|
||||
/* 1. "./repmgr.conf" */
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("looking for configuration file in current directory\n"));
|
||||
}
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "./%s", CONFIG_FILE_NAME);
|
||||
canonicalize_path(config_file_path);
|
||||
|
||||
if (stat(config_file_path, &stat_config) == 0)
|
||||
{
|
||||
config_file_found = true;
|
||||
goto end_search;
|
||||
}
|
||||
|
||||
/* 2. "/etc/repmgr.conf" */
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("looking for configuration file in /etc\n"));
|
||||
}
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "/etc/%s", CONFIG_FILE_NAME);
|
||||
if (stat(config_file_path, &stat_config) == 0)
|
||||
{
|
||||
config_file_found = true;
|
||||
goto end_search;
|
||||
}
|
||||
|
||||
/* 3. default sysconfdir */
|
||||
if (find_my_exec(argv0, my_exec_path) < 0)
|
||||
{
|
||||
fprintf(stderr, _("%s: could not find own program executable\n"), argv0);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
get_etc_path(my_exec_path, etc_path);
|
||||
get_etc_path(my_exec_path, sysconf_etc_path);
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "%s/repmgr.conf", etc_path);
|
||||
|
||||
log_debug(_("Looking for configuration file in %s\n"), etc_path);
|
||||
|
||||
if (stat(config_file_path, &config) != 0)
|
||||
if (verbose == true)
|
||||
{
|
||||
/* Not found - default to ./repmgr.conf */
|
||||
strncpy(config_file_path, DEFAULT_CONFIG_FILE, MAXPGPATH);
|
||||
canonicalize_path(config_file_path);
|
||||
log_debug(_("Looking for configuration file in %s\n"), config_file_path);
|
||||
log_notice(_("looking for configuration file in %s\n"), sysconf_etc_path);
|
||||
}
|
||||
|
||||
snprintf(config_file_path, MAXPGPATH, "%s/%s", sysconf_etc_path, CONFIG_FILE_NAME);
|
||||
if (stat(config_file_path, &stat_config) == 0)
|
||||
{
|
||||
config_file_found = true;
|
||||
goto end_search;
|
||||
}
|
||||
|
||||
end_search:
|
||||
if (config_file_found == true)
|
||||
{
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("configuration file found at: %s\n"), config_file_path);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("no configuration file provided or found\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,49 +180,39 @@ load_config(const char *config_file, t_configuration_options *options, char *arg
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parse configuration file; if any errors are encountered,
|
||||
* list them and exit.
|
||||
*
|
||||
* Ensure any default values set here are synced with repmgr.conf.sample
|
||||
* and any other documentation.
|
||||
*/
|
||||
bool
|
||||
parse_config(t_configuration_options *options)
|
||||
{
|
||||
FILE *fp;
|
||||
char *s,
|
||||
buff[MAXLINELENGTH];
|
||||
buf[MAXLINELENGTH];
|
||||
char name[MAXLEN];
|
||||
char value[MAXLEN];
|
||||
|
||||
/* For sanity-checking provided conninfo string */
|
||||
PQconninfoOption *conninfo_options;
|
||||
char *conninfo_errmsg = NULL;
|
||||
char *conninfo_errmsg = NULL;
|
||||
|
||||
fp = fopen(config_file_path, "r");
|
||||
/* Collate configuration file errors here for friendlier reporting */
|
||||
static ErrorList config_errors = { NULL, NULL };
|
||||
|
||||
/*
|
||||
* Since some commands don't require a config file at all, not having one
|
||||
* isn't necessarily a problem.
|
||||
*
|
||||
* If the user explictly provided a configuration file and we can't
|
||||
* read it we'll raise an error.
|
||||
*
|
||||
* If no configuration file was provided, we'll try and read the default\
|
||||
* file if it exists and is readable, but won't worry if it's not.
|
||||
bool node_found = false;
|
||||
|
||||
/* Initialize configuration options with sensible defaults
|
||||
* note: the default log level is set in log.c and does not need
|
||||
* to be initialised here
|
||||
*/
|
||||
if (fp == NULL)
|
||||
{
|
||||
if (config_file_provided)
|
||||
{
|
||||
log_err(_("unable to open provided configuration file '%s'; terminating\n"), config_file_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_notice(_("no configuration file provided and default file '%s' not found - "
|
||||
"continuing with default values\n"),
|
||||
DEFAULT_CONFIG_FILE);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Initialize configuration options with sensible defaults */
|
||||
memset(options->cluster_name, 0, sizeof(options->cluster_name));
|
||||
options->node = -1;
|
||||
options->upstream_node = NO_UPSTREAM_NODE;
|
||||
options->use_replication_slots = 0;
|
||||
memset(options->conninfo, 0, sizeof(options->conninfo));
|
||||
options->failover = MANUAL_FAILOVER;
|
||||
options->priority = DEFAULT_PRIORITY;
|
||||
@@ -162,25 +230,58 @@ parse_config(t_configuration_options *options)
|
||||
|
||||
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
||||
options->reconnect_attempts = 6;
|
||||
options->reconnect_intvl = 10;
|
||||
options->reconnect_interval = 10;
|
||||
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
options->tablespace_mapping.tail = NULL;
|
||||
|
||||
/*
|
||||
* If no configuration file available (user didn't specify and none found
|
||||
* in the default locations), return with default values
|
||||
*/
|
||||
if (config_file_found == false)
|
||||
{
|
||||
log_verbose(LOG_NOTICE, _("no configuration file provided and no default file found - "
|
||||
"continuing with default values\n"));
|
||||
return true;
|
||||
}
|
||||
|
||||
fp = fopen(config_file_path, "r");
|
||||
|
||||
/* Read next line */
|
||||
while ((s = fgets(buff, sizeof buff, fp)) != NULL)
|
||||
/*
|
||||
* A configuration file has been found, either provided by the user
|
||||
* or found in one of the default locations. If we can't open it,
|
||||
* fail with an error.
|
||||
*/
|
||||
if (fp == NULL)
|
||||
{
|
||||
if (config_file_provided)
|
||||
{
|
||||
log_err(_("unable to open provided configuration file \"%s\"; terminating\n"), config_file_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_err(_("unable to open default configuration file \"%s\"; terminating\n"), config_file_path);
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Read file */
|
||||
while ((s = fgets(buf, sizeof buf, fp)) != NULL)
|
||||
{
|
||||
bool known_parameter = true;
|
||||
|
||||
/* Parse name/value pair from line */
|
||||
parse_line(buff, name, value);
|
||||
parse_line(buf, name, value);
|
||||
|
||||
/* Skip blank lines */
|
||||
if (!strlen(name))
|
||||
@@ -194,9 +295,12 @@ parse_config(t_configuration_options *options)
|
||||
if (strcmp(name, "cluster") == 0)
|
||||
strncpy(options->cluster_name, value, MAXLEN);
|
||||
else if (strcmp(name, "node") == 0)
|
||||
options->node = atoi(value);
|
||||
{
|
||||
options->node = repmgr_atoi(value, "node", &config_errors, false);
|
||||
node_found = true;
|
||||
}
|
||||
else if (strcmp(name, "upstream_node") == 0)
|
||||
options->upstream_node = atoi(value);
|
||||
options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors, false);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy(options->conninfo, value, MAXLEN);
|
||||
else if (strcmp(name, "rsync_options") == 0)
|
||||
@@ -223,12 +327,11 @@ parse_config(t_configuration_options *options)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_err(_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
error_list_append(&config_errors,_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||
}
|
||||
}
|
||||
else if (strcmp(name, "priority") == 0)
|
||||
options->priority = atoi(value);
|
||||
options->priority = repmgr_atoi(value, "priority", &config_errors, true);
|
||||
else if (strcmp(name, "node_name") == 0)
|
||||
strncpy(options->node_name, value, MAXLEN);
|
||||
else if (strcmp(name, "promote_command") == 0)
|
||||
@@ -236,11 +339,16 @@ parse_config(t_configuration_options *options)
|
||||
else if (strcmp(name, "follow_command") == 0)
|
||||
strncpy(options->follow_command, value, MAXLEN);
|
||||
else if (strcmp(name, "master_response_timeout") == 0)
|
||||
options->master_response_timeout = atoi(value);
|
||||
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
|
||||
/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||
* we'll switch terminology in a future release (3.1?)
|
||||
*/
|
||||
else if (strcmp(name, "primary_response_timeout") == 0)
|
||||
options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", &config_errors, false);
|
||||
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||
options->reconnect_attempts = atoi(value);
|
||||
options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", &config_errors, false);
|
||||
else if (strcmp(name, "reconnect_interval") == 0)
|
||||
options->reconnect_intvl = atoi(value);
|
||||
options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", &config_errors, false);
|
||||
else if (strcmp(name, "pg_bindir") == 0)
|
||||
strncpy(options->pg_bindir, value, MAXLEN);
|
||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||
@@ -250,11 +358,14 @@ parse_config(t_configuration_options *options)
|
||||
else if (strcmp(name, "logfile") == 0)
|
||||
strncpy(options->logfile, value, MAXLEN);
|
||||
else if (strcmp(name, "monitor_interval_secs") == 0)
|
||||
options->monitor_interval_secs = atoi(value);
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = atoi(value);
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
options->use_replication_slots = atoi(value);
|
||||
/* XXX we should have a dedicated boolean argument format */
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||
else if (strcmp(name, "event_notification_command") == 0)
|
||||
strncpy(options->event_notification_command, value, MAXLEN);
|
||||
else if (strcmp(name, "event_notifications") == 0)
|
||||
@@ -274,76 +385,54 @@ parse_config(t_configuration_options *options)
|
||||
* as currently e.g. an empty `node` value will be converted to '0'.
|
||||
*/
|
||||
if (known_parameter == true && !strlen(value)) {
|
||||
log_err(_("no value provided for parameter '%s'\n"), name);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
char error_message_buf[MAXLEN] = "";
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
_("no value provided for parameter \"%s\""),
|
||||
name);
|
||||
|
||||
error_list_append(&config_errors, error_message_buf);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
/* Check config settings */
|
||||
|
||||
/* The following checks are for the presence of the parameter */
|
||||
if (*options->cluster_name == '\0')
|
||||
if (node_found == false)
|
||||
{
|
||||
log_err(_("required parameter 'cluster' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
error_list_append(&config_errors, _("\"node\": parameter was not found"));
|
||||
}
|
||||
else if (options->node == 0)
|
||||
{
|
||||
error_list_append(&config_errors, _("\"node\": must be greater than zero"));
|
||||
}
|
||||
|
||||
if (options->node == -1)
|
||||
if (strlen(options->conninfo))
|
||||
{
|
||||
log_err(_("required parameter 'node' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
|
||||
/* Sanity check the provided conninfo string
|
||||
*
|
||||
* NOTE: PQconninfoParse() verifies the string format and checks for valid options
|
||||
* but does not sanity check values
|
||||
*/
|
||||
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
||||
if (conninfo_options == NULL)
|
||||
{
|
||||
char error_message_buf[MAXLEN] = "";
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
_("\"conninfo\": %s"),
|
||||
conninfo_errmsg);
|
||||
|
||||
error_list_append(&config_errors, error_message_buf);
|
||||
}
|
||||
|
||||
PQconninfoFree(conninfo_options);
|
||||
}
|
||||
|
||||
if (options->node == 0)
|
||||
if (config_errors.head != NULL)
|
||||
{
|
||||
log_err(_("'node' must be an integer greater than zero\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->node_name == '\0')
|
||||
{
|
||||
log_err(_("required parameter 'node_name' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*options->conninfo == '\0')
|
||||
{
|
||||
log_err(_("required parameter 'conninfo' was not found\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Sanity check the provided conninfo string
|
||||
*
|
||||
* NOTE: this verifies the string format and checks for valid options
|
||||
* but does not sanity check values
|
||||
*/
|
||||
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
||||
if (conninfo_options == NULL)
|
||||
{
|
||||
log_err(_("Parameter 'conninfo' is invalid: %s"), conninfo_errmsg);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
PQconninfoFree(conninfo_options);
|
||||
|
||||
/* The following checks are for valid parameter values */
|
||||
if (options->master_response_timeout <= 0)
|
||||
{
|
||||
log_err(_("'master_response_timeout' must be greater than zero\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->reconnect_attempts < 0)
|
||||
{
|
||||
log_err(_("'reconnect_attempts' must be zero or greater\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (options->reconnect_intvl < 0)
|
||||
{
|
||||
log_err(_("'reconnect_interval' must be zero or greater\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
exit_with_errors(&config_errors);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -378,7 +467,7 @@ trim(char *s)
|
||||
}
|
||||
|
||||
void
|
||||
parse_line(char *buff, char *name, char *value)
|
||||
parse_line(char *buf, char *name, char *value)
|
||||
{
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
@@ -389,10 +478,10 @@ parse_line(char *buff, char *name, char *value)
|
||||
for (; i < MAXLEN; ++i)
|
||||
{
|
||||
|
||||
if (buff[i] == '=')
|
||||
if (buf[i] == '=')
|
||||
break;
|
||||
|
||||
switch(buff[i])
|
||||
switch(buf[i])
|
||||
{
|
||||
/* Ignore whitespace */
|
||||
case ' ':
|
||||
@@ -401,7 +490,7 @@ parse_line(char *buff, char *name, char *value)
|
||||
case '\t':
|
||||
continue;
|
||||
default:
|
||||
name[j++] = buff[i];
|
||||
name[j++] = buf[i];
|
||||
}
|
||||
}
|
||||
name[j] = '\0';
|
||||
@@ -411,9 +500,9 @@ parse_line(char *buff, char *name, char *value)
|
||||
*/
|
||||
for (; i < MAXLEN; ++i)
|
||||
{
|
||||
if (buff[i+1] == ' ')
|
||||
if (buf[i+1] == ' ')
|
||||
continue;
|
||||
if (buff[i+1] == '\t')
|
||||
if (buf[i+1] == '\t')
|
||||
continue;
|
||||
|
||||
break;
|
||||
@@ -424,12 +513,12 @@ parse_line(char *buff, char *name, char *value)
|
||||
*/
|
||||
j = 0;
|
||||
for (++i; i < MAXLEN; ++i)
|
||||
if (buff[i] == '\'')
|
||||
if (buf[i] == '\'')
|
||||
continue;
|
||||
else if (buff[i] == '#')
|
||||
else if (buf[i] == '#')
|
||||
break;
|
||||
else if (buff[i] != '\n')
|
||||
value[j++] = buff[i];
|
||||
else if (buf[i] != '\n')
|
||||
value[j++] = buf[i];
|
||||
else
|
||||
break;
|
||||
value[j] = '\0';
|
||||
@@ -491,7 +580,7 @@ reload_config(t_configuration_options *orig_options)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_options.reconnect_intvl < 0)
|
||||
if (new_options.reconnect_interval < 0)
|
||||
{
|
||||
log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
|
||||
return false;
|
||||
@@ -610,10 +699,10 @@ reload_config(t_configuration_options *orig_options)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* reconnect_intvl */
|
||||
if (orig_options->reconnect_intvl != new_options.reconnect_intvl)
|
||||
/* reconnect_interval */
|
||||
if (orig_options->reconnect_interval != new_options.reconnect_interval)
|
||||
{
|
||||
orig_options->reconnect_intvl = new_options.reconnect_intvl;
|
||||
orig_options->reconnect_interval = new_options.reconnect_interval;
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
@@ -665,6 +754,96 @@ reload_config(t_configuration_options *orig_options)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
error_list_append(ErrorList *error_list, char *error_message)
|
||||
{
|
||||
ErrorListCell *cell;
|
||||
|
||||
cell = (ErrorListCell *) pg_malloc0(sizeof(ErrorListCell));
|
||||
|
||||
if (cell == NULL)
|
||||
{
|
||||
log_err(_("unable to allocate memory; terminating.\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
cell->error_message = pg_malloc0(MAXLEN);
|
||||
strncpy(cell->error_message, error_message, MAXLEN);
|
||||
|
||||
if (error_list->tail)
|
||||
{
|
||||
error_list->tail->next = cell;
|
||||
}
|
||||
else
|
||||
{
|
||||
error_list->head = cell;
|
||||
}
|
||||
|
||||
error_list->tail = cell;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Convert provided string to an integer using strtol;
|
||||
* on error, if a callback is provided, pass the error message to that,
|
||||
* otherwise exit
|
||||
*/
|
||||
int
|
||||
repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list, bool allow_negative)
|
||||
{
|
||||
char *endptr;
|
||||
long longval = 0;
|
||||
char error_message_buf[MAXLEN] = "";
|
||||
|
||||
/* It's possible that some versions of strtol() don't treat an empty
|
||||
* string as an error.
|
||||
*/
|
||||
|
||||
if (*value == '\0')
|
||||
{
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
_("no value provided for \"%s\""),
|
||||
config_item);
|
||||
}
|
||||
else
|
||||
{
|
||||
errno = 0;
|
||||
longval = strtol(value, &endptr, 10);
|
||||
|
||||
if (value == endptr || errno)
|
||||
{
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
_("\"%s\": invalid value (provided: \"%s\")"),
|
||||
config_item, value);
|
||||
}
|
||||
}
|
||||
|
||||
/* Disallow negative values for most parameters */
|
||||
if (allow_negative == false && longval < 0)
|
||||
{
|
||||
snprintf(error_message_buf,
|
||||
MAXLEN,
|
||||
_("\"%s\" must be zero or greater (provided: %s)"),
|
||||
config_item, value);
|
||||
}
|
||||
|
||||
/* Error message buffer is set */
|
||||
if (error_message_buf[0] != '\0')
|
||||
{
|
||||
if (error_list == NULL)
|
||||
{
|
||||
log_err("%s\n", error_message_buf);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
error_list_append(error_list, error_message_buf);
|
||||
}
|
||||
|
||||
return (int32) longval;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Split argument into old_dir and new_dir and append to tablespace mapping
|
||||
@@ -797,3 +976,21 @@ parse_event_notifications_list(t_configuration_options *options, const char *arg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
exit_with_errors(ErrorList *config_errors)
|
||||
{
|
||||
ErrorListCell *cell;
|
||||
|
||||
log_err(_("%s: following errors were found in the configuration file.\n"), progname());
|
||||
|
||||
for (cell = config_errors->head; cell; cell = cell->next)
|
||||
{
|
||||
log_err("%s\n", cell->error_message);
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
29
config.h
29
config.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* config.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include "strutil.h"
|
||||
|
||||
#define CONFIG_FILE_NAME "repmgr.conf"
|
||||
|
||||
typedef struct EventNotificationListCell
|
||||
{
|
||||
@@ -67,26 +68,46 @@ typedef struct
|
||||
char ssh_options[QUERY_STR_LEN];
|
||||
int master_response_timeout;
|
||||
int reconnect_attempts;
|
||||
int reconnect_intvl;
|
||||
int reconnect_interval;
|
||||
char pg_bindir[MAXLEN];
|
||||
char pg_ctl_options[MAXLEN];
|
||||
char pg_basebackup_options[MAXLEN];
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int witness_repl_nodes_sync_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
struct ErrorListCell *next;
|
||||
char *error_message;
|
||||
} ErrorListCell;
|
||||
|
||||
bool load_config(const char *config_file, t_configuration_options *options, char *argv0);
|
||||
typedef struct ErrorList
|
||||
{
|
||||
ErrorListCell *head;
|
||||
ErrorListCell *tail;
|
||||
} ErrorList;
|
||||
|
||||
void set_progname(const char *argv0);
|
||||
const char * progname(void);
|
||||
|
||||
bool load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0);
|
||||
bool reload_config(t_configuration_options *orig_options);
|
||||
bool parse_config(t_configuration_options *options);
|
||||
void parse_line(char *buff, char *name, char *value);
|
||||
char *trim(char *s);
|
||||
void error_list_append(ErrorList *error_list, char *error_message);
|
||||
int repmgr_atoi(const char *s,
|
||||
const char *config_item,
|
||||
ErrorList *error_list,
|
||||
bool allow_negative);
|
||||
|
||||
#endif
|
||||
|
||||
82
dbutils.h
82
dbutils.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* dbutils.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -20,13 +20,71 @@
|
||||
#ifndef _REPMGR_DBUTILS_H_
|
||||
#define _REPMGR_DBUTILS_H_
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
|
||||
#include "config.h"
|
||||
#include "strutil.h"
|
||||
|
||||
|
||||
typedef enum {
|
||||
UNKNOWN = 0,
|
||||
MASTER,
|
||||
STANDBY,
|
||||
WITNESS
|
||||
} t_server_type;
|
||||
|
||||
/*
|
||||
* Struct to store node information
|
||||
*/
|
||||
typedef struct s_node_info
|
||||
{
|
||||
int node_id;
|
||||
int upstream_node_id;
|
||||
t_server_type type;
|
||||
char name[MAXLEN];
|
||||
char conninfo_str[MAXLEN];
|
||||
char slot_name[MAXLEN];
|
||||
int priority;
|
||||
bool active;
|
||||
bool is_ready;
|
||||
bool is_visible;
|
||||
XLogRecPtr xlog_location;
|
||||
} t_node_info;
|
||||
|
||||
|
||||
/*
|
||||
* Struct to store replication slot information
|
||||
*/
|
||||
|
||||
typedef struct s_replication_slot
|
||||
{
|
||||
char slot_name[MAXLEN];
|
||||
char slot_type[MAXLEN];
|
||||
bool active;
|
||||
} t_replication_slot;
|
||||
|
||||
|
||||
#define T_NODE_INFO_INITIALIZER { \
|
||||
NODE_NOT_FOUND, \
|
||||
NO_UPSTREAM_NODE, \
|
||||
UNKNOWN, \
|
||||
"", \
|
||||
"", \
|
||||
"", \
|
||||
DEFAULT_PRIORITY, \
|
||||
true, \
|
||||
false, \
|
||||
false, \
|
||||
InvalidXLogRecPtr \
|
||||
}
|
||||
|
||||
PGconn *_establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error,
|
||||
const bool log_notice);
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
const bool exit_on_error);
|
||||
PGconn *test_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
const char *values[],
|
||||
const bool exit_on_error);
|
||||
@@ -45,7 +103,7 @@ int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value, const char *datatype);
|
||||
|
||||
bool get_conninfo_value(const char *conninfo, const char *keyword, char *output);
|
||||
PGconn *get_upstream_connection(PGconn *standby_conn, char *cluster,
|
||||
int node_id,
|
||||
int *upstream_node_id_ptr,
|
||||
@@ -57,16 +115,22 @@ int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
char *get_repmgr_schema(void);
|
||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
|
||||
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
|
||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||
t_server_type parse_node_type(const char *type);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
#endif
|
||||
|
||||
|
||||
6
debian/DEBIAN/control
vendored
6
debian/DEBIAN/control
vendored
@@ -1,9 +1,9 @@
|
||||
Package: repmgr-auto
|
||||
Version: 2.0beta2
|
||||
Version: 3.0.1
|
||||
Section: database
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
Depends: rsync, postgresql-9.0 | postgresql-9.1 | postgresql-9.2 | postgresql-9.3 | postgresql-9.4
|
||||
Maintainer: Jaime Casanova <jaime@2ndQuadrant.com>
|
||||
Depends: rsync, postgresql-9.3 | postgresql-9.4
|
||||
Maintainer: Self built package <user@localhost>
|
||||
Description: PostgreSQL replication setup, magament and monitoring
|
||||
has two main executables
|
||||
|
||||
2
debian/repmgr.repmgrd.init
vendored
2
debian/repmgr.repmgrd.init
vendored
@@ -59,7 +59,7 @@ do_stop()
|
||||
# 0 if daemon has been stopped
|
||||
# 1 if daemon was already stopped
|
||||
# other if daemon could not be stopped or a failure occurred
|
||||
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN
|
||||
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --name "$(basename $REPMGRD_BIN)"
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* errcode.h
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -36,5 +36,7 @@
|
||||
#define ERR_SYS_FAILURE 13
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
#define ERR_BAD_BACKUP_LABEL 17
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
130
log.c
130
log.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.c - Logging methods
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This module is a set of methods for logging (currently only syslog)
|
||||
*
|
||||
@@ -39,13 +39,37 @@
|
||||
|
||||
/* #define REPMGR_DEBUG */
|
||||
|
||||
static int detect_log_facility(const char *facility);
|
||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_NOTICE;
|
||||
int last_log_level = LOG_NOTICE;
|
||||
int verbose_logging = false;
|
||||
int terse_logging = false;
|
||||
|
||||
void
|
||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
|
||||
va_start(arglist, fmt);
|
||||
_stderr_log_with_level(level_name, level, fmt, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
static void
|
||||
_stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||
{
|
||||
time_t t;
|
||||
struct tm *tm;
|
||||
char buff[100];
|
||||
va_list ap;
|
||||
|
||||
/*
|
||||
* Store the requested level so that if there's a subsequent
|
||||
* log_hint(), we can suppress that if appropriate.
|
||||
*/
|
||||
last_log_level = level;
|
||||
|
||||
if (log_level >= level)
|
||||
{
|
||||
@@ -54,24 +78,74 @@ stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
||||
fprintf(stderr, "%s [%s] ", buff, level_name);
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
log_hint(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
static int detect_log_level(const char *level);
|
||||
static int detect_log_facility(const char *facility);
|
||||
if (terse_logging == false)
|
||||
{
|
||||
va_start(ap, fmt);
|
||||
_stderr_log_with_level("HINT", last_log_level, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
log_verbose(int level, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
|
||||
if (verbose_logging == true)
|
||||
{
|
||||
switch(level)
|
||||
{
|
||||
case LOG_EMERG:
|
||||
_stderr_log_with_level("EMERG", level, fmt, ap);
|
||||
break;
|
||||
case LOG_ALERT:
|
||||
_stderr_log_with_level("ALERT", level, fmt, ap);
|
||||
break;
|
||||
case LOG_CRIT:
|
||||
_stderr_log_with_level("CRIT", level, fmt, ap);
|
||||
break;
|
||||
case LOG_ERR:
|
||||
_stderr_log_with_level("ERR", level, fmt, ap);
|
||||
break;
|
||||
case LOG_WARNING:
|
||||
_stderr_log_with_level("WARNING", level, fmt, ap);
|
||||
break;
|
||||
case LOG_NOTICE:
|
||||
_stderr_log_with_level("NOTICE", level, fmt, ap);
|
||||
break;
|
||||
case LOG_INFO:
|
||||
_stderr_log_with_level("INFO", level, fmt, ap);
|
||||
break;
|
||||
case LOG_DEBUG:
|
||||
_stderr_log_with_level("DEBUG", level, fmt, ap);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_NOTICE;
|
||||
|
||||
bool
|
||||
logger_init(t_configuration_options * opts, const char *ident, const char *level, const char *facility)
|
||||
logger_init(t_configuration_options * opts, const char *ident)
|
||||
{
|
||||
char *level = opts->loglevel;
|
||||
char *facility = opts->logfacility;
|
||||
|
||||
int l;
|
||||
int f;
|
||||
|
||||
@@ -95,10 +169,10 @@ logger_init(t_configuration_options * opts, const char *ident, const char *level
|
||||
printf("Assigned level for logger: %d\n", l);
|
||||
#endif
|
||||
|
||||
if (l > 0)
|
||||
if (l >= 0)
|
||||
log_level = l;
|
||||
else
|
||||
stderr_log_warning(_("Cannot detect log level %s (use any of DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
||||
stderr_log_warning(_("Invalid log level \"%s\" (available values: DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
||||
}
|
||||
|
||||
if (facility && *facility)
|
||||
@@ -174,9 +248,9 @@ logger_init(t_configuration_options * opts, const char *ident, const char *level
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
logger_shutdown(void)
|
||||
{
|
||||
@@ -189,17 +263,32 @@ logger_shutdown(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a minimum logging level. Intended for command line verbosity
|
||||
* options, which might increase requested logging over what's specified
|
||||
* in the regular configuration file.
|
||||
* Indicate whether extra-verbose logging is required. This will
|
||||
* generate a lot of output, particularly debug logging, and should
|
||||
* not be permanently enabled in production.
|
||||
*
|
||||
* NOTE: in previous repmgr versions, this option forced the log
|
||||
* level to INFO.
|
||||
*/
|
||||
void
|
||||
logger_min_verbose(int minimum)
|
||||
logger_set_verbose(void)
|
||||
{
|
||||
if (log_level < minimum)
|
||||
log_level = minimum;
|
||||
verbose_logging = true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Indicate whether some non-critical log messages can be omitted.
|
||||
* Currently this includes warnings about irrelevant command line
|
||||
* options and hints.
|
||||
*/
|
||||
|
||||
void logger_set_terse(void)
|
||||
{
|
||||
terse_logging = true;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
detect_log_level(const char *level)
|
||||
{
|
||||
@@ -220,17 +309,16 @@ detect_log_level(const char *level)
|
||||
if (!strcmp(level, "EMERG"))
|
||||
return LOG_EMERG;
|
||||
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
static int
|
||||
detect_log_facility(const char *facility)
|
||||
{
|
||||
int local = 0;
|
||||
|
||||
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
|
||||
{
|
||||
|
||||
local = atoi(&facility[5]);
|
||||
|
||||
switch (local)
|
||||
|
||||
14
log.h
14
log.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -112,13 +112,19 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
#endif
|
||||
|
||||
|
||||
int detect_log_level(const char *level);
|
||||
|
||||
/* Logger initialisation and shutdown */
|
||||
|
||||
bool logger_init(t_configuration_options * opts, const char *ident);
|
||||
|
||||
bool logger_shutdown(void);
|
||||
|
||||
bool logger_init(t_configuration_options * opts, const char *ident,
|
||||
const char *level, const char *facility);
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
|
||||
void logger_min_verbose(int minimum);
|
||||
void log_hint(const char *fmt, ...);
|
||||
void log_verbose(int level, const char *fmt, ...);
|
||||
|
||||
extern int log_type;
|
||||
extern int log_level;
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# Replication Manager sample configuration file
|
||||
###################################################
|
||||
|
||||
# Some configuration items will be set with a default value; this
|
||||
# is noted for each item. Where no default value is shown, the
|
||||
# parameter will be treated as empty or false.
|
||||
|
||||
# Required configuration items
|
||||
# ============================
|
||||
#
|
||||
@@ -11,17 +15,21 @@
|
||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
||||
# to facilitate easier querying of the repmgr views and tables.
|
||||
cluster=example_cluster
|
||||
#cluster=example_cluster
|
||||
|
||||
# Node ID and name
|
||||
# (Note: we recommend to avoid naming nodes after their initial
|
||||
# replication funcion, as this will cause confusion when e.g.
|
||||
# "standby2" is promoted to master)
|
||||
node=2
|
||||
node_name=node2
|
||||
# "standby2" is promoted to primary)
|
||||
#node=2 # a unique integer
|
||||
#node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||
# the server's hostname or another identifier unambiguously
|
||||
# associated with the server to avoid confusion
|
||||
|
||||
# Database connection information
|
||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
# Database connection information as a conninfo string
|
||||
# This must be accessible to all servers in the cluster; for details see:
|
||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
|
||||
# Optional configuration items
|
||||
# ============================
|
||||
@@ -29,33 +37,34 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||
# Replication settings
|
||||
# ---------------------
|
||||
|
||||
# when using cascading replication and a standby is to be connected to an
|
||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||
# must exist before the new standby can be registered. If a standby is
|
||||
# to connect directly to a master node, this parameter is not required.
|
||||
#
|
||||
# upstream_node=1
|
||||
# When using cascading replication, a standby can connect to another
|
||||
# upstream standby node which is specified by setting 'upstream_node'.
|
||||
# In that case, the upstream node must exist before the new standby
|
||||
# can be registered. If 'upstream_node' is not set, then the standby
|
||||
# will connect directly to the primary node.
|
||||
#upstream_node=1
|
||||
|
||||
# physical replication slots - PostgreSQL 9.4 and later only
|
||||
# use physical replication slots - PostgreSQL 9.4 and later only
|
||||
# (default: 0)
|
||||
#
|
||||
# use_replication_slots=0
|
||||
#use_replication_slots=0
|
||||
|
||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||
# number of standbys which will connect to the primary.
|
||||
|
||||
# Logging and monitoring settings
|
||||
# -------------------------------
|
||||
|
||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||
# (default: NOTICE)
|
||||
loglevel=NOTICE
|
||||
#loglevel=NOTICE
|
||||
|
||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
# (default: STDERR)
|
||||
logfacility=STDERR
|
||||
#logfacility=STDERR
|
||||
|
||||
# stderr can be redirected to an arbitrary file:
|
||||
#
|
||||
# logfile='/var/log/repmgr.log'
|
||||
#logfile='/var/log/repmgr/repmgr.log'
|
||||
|
||||
# event notifications can be passed to an arbitrary external program
|
||||
# together with the following parameters:
|
||||
@@ -69,12 +78,12 @@ logfacility=STDERR
|
||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
||||
# so should be quoted in the provided command configuration, e.g.:
|
||||
#
|
||||
# event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones:
|
||||
#
|
||||
# event_notifications=master_register,standby_register,witness_create
|
||||
#event_notifications=master_register,standby_register,witness_create
|
||||
|
||||
|
||||
# Environment/command settings
|
||||
@@ -82,17 +91,17 @@ logfacility=STDERR
|
||||
|
||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
||||
# (if not provided, defaults to system $PATH)
|
||||
# pg_bindir=/usr/bin/
|
||||
#pg_bindir=/usr/bin/
|
||||
|
||||
# external command options
|
||||
|
||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
# ssh_options=-o "StrictHostKeyChecking no"
|
||||
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
#ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
# external command arguments
|
||||
# external command arguments. Values shown are examples.
|
||||
|
||||
# pg_ctl_options='-s'
|
||||
# pg_basebackup_options='--xlog-method=s'
|
||||
#pg_ctl_options='-s'
|
||||
#pg_basebackup_options='--xlog-method=s'
|
||||
|
||||
|
||||
# Standby clone settings
|
||||
@@ -108,30 +117,36 @@ logfacility=STDERR
|
||||
# Failover settings (repmgrd)
|
||||
# ---------------------------
|
||||
#
|
||||
# These settings are only applied when repmgrd is running.
|
||||
# These settings are only applied when repmgrd is running. Values shown
|
||||
# are defaults.
|
||||
|
||||
# How many seconds we wait for master response before declaring master failure
|
||||
master_response_timeout=60
|
||||
# Number of seconds to wait for a response from the primary server before
|
||||
# deciding it has failed.
|
||||
|
||||
# How many time we try to reconnect to master before starting failover procedure
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=10
|
||||
#master_response_timeout=60
|
||||
|
||||
# Number of attempts at what interval (in seconds) to try and
|
||||
# connect to a server to establish its status (e.g. master
|
||||
# during failover)
|
||||
#reconnect_attempts=6
|
||||
#reconnect_interval=10
|
||||
|
||||
# Autofailover options
|
||||
failover=automatic # one of 'automatic', 'manual'
|
||||
priority=100 # a value of zero or less prevents the node being promoted to master
|
||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
#failover=manual # one of 'automatic', 'manual'
|
||||
# (default: manual)
|
||||
#priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||
|
||||
# monitoring interval; default is 2s
|
||||
#
|
||||
# monitor_interval_secs=2
|
||||
# monitoring interval in seconds; default is 2
|
||||
#monitor_interval_secs=2
|
||||
|
||||
# change wait time for master; before we bail out and exit when the master
|
||||
# change wait time for primary; before we bail out and exit when the primary
|
||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
#
|
||||
# retry_promote_interval_secs=300
|
||||
|
||||
#retry_promote_interval_secs=300
|
||||
|
||||
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||
#witness_repl_nodes_sync_interval_secs=15
|
||||
|
||||
66
repmgr.h
66
repmgr.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -32,15 +32,10 @@
|
||||
#define MIN_SUPPORTED_VERSION "9.3"
|
||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||
|
||||
#include "config.h"
|
||||
#define MAXFILENAME 1024
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
#define DEFAULT_CONFIG_FILE "./repmgr.conf"
|
||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||
#define DEFAULT_DEST_DIR "."
|
||||
#define DEFAULT_MASTER_PORT "5432"
|
||||
#define DEFAULT_DBNAME "postgres"
|
||||
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define FAILOVER_NODES_MAX_CHECK 50
|
||||
@@ -49,14 +44,7 @@
|
||||
#define AUTOMATIC_FAILOVER 1
|
||||
#define NODE_NOT_FOUND -1
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
|
||||
|
||||
typedef enum {
|
||||
UNKNOWN = 0,
|
||||
MASTER,
|
||||
STANDBY,
|
||||
WITNESS
|
||||
} t_server_type;
|
||||
#define UNKNOWN_NODE_ID -1
|
||||
|
||||
|
||||
|
||||
@@ -67,44 +55,60 @@ typedef struct
|
||||
char dbname[MAXLEN];
|
||||
char host[MAXLEN];
|
||||
char username[MAXLEN];
|
||||
char dest_dir[MAXFILENAME];
|
||||
char config_file[MAXFILENAME];
|
||||
char dest_dir[MAXPGPATH];
|
||||
char config_file[MAXPGPATH];
|
||||
char remote_user[MAXLEN];
|
||||
char superuser[MAXLEN];
|
||||
char wal_keep_segments[MAXLEN];
|
||||
bool verbose;
|
||||
bool terse;
|
||||
bool force;
|
||||
bool wait_for_master;
|
||||
bool ignore_rsync_warn;
|
||||
bool initdb_no_pwprompt;
|
||||
bool witness_pwprompt;
|
||||
bool rsync_only;
|
||||
bool fast_checkpoint;
|
||||
bool ignore_external_config_files;
|
||||
char pg_ctl_mode[MAXLEN];
|
||||
char masterport[MAXLEN];
|
||||
char localport[MAXLEN];
|
||||
/*
|
||||
* configuration file parameters which can be overridden on the
|
||||
* command line
|
||||
*/
|
||||
char loglevel[MAXLEN];
|
||||
|
||||
/* parameter used by STANDBY SWITCHOVER */
|
||||
char remote_config_file[MAXLEN];
|
||||
char pg_rewind[MAXPGPATH];
|
||||
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
|
||||
char config_archive_dir[MAXLEN];
|
||||
/* parameter used by CLUSTER CLEANUP */
|
||||
int keep_history;
|
||||
|
||||
char pg_bindir[MAXLEN];
|
||||
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
|
||||
/* deprecated command line options */
|
||||
char localport[MAXLEN];
|
||||
bool initdb_no_pwprompt;
|
||||
} t_runtime_options;
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, "", "", 0, "", "" }
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
|
||||
|
||||
struct BackupLabel
|
||||
{
|
||||
XLogRecPtr start_wal_location;
|
||||
char start_wal_file[MAXLEN];
|
||||
XLogRecPtr checkpoint_location;
|
||||
char backup_from[MAXLEN];
|
||||
char backup_method[MAXLEN];
|
||||
char start_time[MAXLEN];
|
||||
char label[MAXLEN];
|
||||
XLogRecPtr min_failover_slot_lsn;
|
||||
};
|
||||
|
||||
extern char repmgr_schema[MAXLEN];
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
struct ErrorListCell *next;
|
||||
char *error_message;
|
||||
} ErrorListCell;
|
||||
|
||||
typedef struct ErrorList
|
||||
{
|
||||
ErrorListCell *head;
|
||||
ErrorListCell *tail;
|
||||
} ErrorList;
|
||||
extern bool config_file_found;
|
||||
|
||||
#endif
|
||||
|
||||
11
repmgr.sql
11
repmgr.sql
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* repmgr.sql
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -59,3 +59,12 @@ WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monito
|
||||
ALTER VIEW repl_status OWNER TO repmgr;
|
||||
|
||||
CREATE INDEX idx_repl_status_sort ON repl_monitor(last_monitor_time, standby_node);
|
||||
|
||||
/*
|
||||
* This view shows the list of nodes with the information of which one is the upstream
|
||||
* in each case (when appliable)
|
||||
*/
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
SELECT rn.id, rn.conninfo, rn.type, rn.name, rn.cluster,
|
||||
rn.priority, rn.active, sq.name AS upstream_node_name
|
||||
FROM repl_nodes as rn LEFT JOIN repl_nodes AS sq ON sq.id=rn.upstream_node_id;
|
||||
|
||||
479
repmgrd.c
479
repmgrd.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgrd.c - Replication manager daemon
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This module connects to the nodes of a replication cluster and monitors
|
||||
* how far are they from master
|
||||
@@ -41,22 +41,6 @@
|
||||
#include "access/xlogdefs.h"
|
||||
#include "pqexpbuffer.h"
|
||||
|
||||
/*
|
||||
* Struct to store node information
|
||||
*/
|
||||
typedef struct s_node_info
|
||||
{
|
||||
int node_id;
|
||||
int upstream_node_id;
|
||||
char conninfo_str[MAXLEN];
|
||||
XLogRecPtr xlog_location;
|
||||
t_server_type type;
|
||||
bool is_ready;
|
||||
bool is_visible;
|
||||
char slot_name[MAXLEN];
|
||||
bool active;
|
||||
} t_node_info;
|
||||
|
||||
|
||||
|
||||
/* Local info */
|
||||
@@ -68,9 +52,7 @@ t_configuration_options master_options;
|
||||
|
||||
PGconn *master_conn = NULL;
|
||||
|
||||
const char *progname;
|
||||
|
||||
char *config_file = DEFAULT_CONFIG_FILE;
|
||||
char *config_file = "";
|
||||
bool verbose = false;
|
||||
bool monitoring_history = false;
|
||||
t_node_info node_info;
|
||||
@@ -81,7 +63,7 @@ char *pid_file = NULL;
|
||||
|
||||
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||
|
||||
static void help(const char *progname);
|
||||
static void help(void);
|
||||
static void usage(void);
|
||||
static void check_cluster_configuration(PGconn *conn);
|
||||
static void check_node_configuration(void);
|
||||
@@ -89,7 +71,7 @@ static void check_node_configuration(void);
|
||||
static void standby_monitor(void);
|
||||
static void witness_monitor(void);
|
||||
static bool check_connection(PGconn **conn, const char *type, const char *conninfo);
|
||||
static bool set_local_node_failed(void);
|
||||
static bool set_local_node_status(void);
|
||||
|
||||
static void update_shared_memory(char *last_wal_standby_applied);
|
||||
static void update_registration(void);
|
||||
@@ -97,7 +79,6 @@ static void do_master_failover(void);
|
||||
static bool do_upstream_standby_failover(t_node_info upstream_node);
|
||||
|
||||
static t_node_info get_node_info(PGconn *conn, char *cluster, int node_id);
|
||||
static t_server_type parse_node_type(const char *type);
|
||||
static XLogRecPtr lsn_to_xlogrecptr(char *lsn, bool *format_ok);
|
||||
|
||||
/*
|
||||
@@ -158,9 +139,24 @@ main(int argc, char **argv)
|
||||
FILE *fd;
|
||||
|
||||
int server_version_num = 0;
|
||||
progname = get_progname(argv[0]);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "?Vf:v:mdp:", long_options, &optindex)) != -1)
|
||||
set_progname(argv[0]);
|
||||
|
||||
/* Disallow running as root to prevent directory ownership problems */
|
||||
if (geteuid() == 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
_("%s: cannot be run as root\n"
|
||||
"Please log in (using, e.g., \"su\") as the "
|
||||
"(unprivileged) user that owns "
|
||||
"the data directory.\n"
|
||||
),
|
||||
progname());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
@@ -180,10 +176,10 @@ main(int argc, char **argv)
|
||||
pid_file = optarg;
|
||||
break;
|
||||
case '?':
|
||||
help(progname);
|
||||
help();
|
||||
exit(SUCCESS);
|
||||
case 'V':
|
||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||
printf("%s %s (PostgreSQL %s)\n", progname(), REPMGR_VERSION, PG_VERSION);
|
||||
exit(SUCCESS);
|
||||
default:
|
||||
usage();
|
||||
@@ -200,7 +196,7 @@ main(int argc, char **argv)
|
||||
* which case we'll need to refactor parse_config() not to abort,
|
||||
* and return the error message.
|
||||
*/
|
||||
load_config(config_file, &local_options, argv[0]);
|
||||
load_config(config_file, verbose, &local_options, argv[0]);
|
||||
|
||||
if (daemonize)
|
||||
{
|
||||
@@ -230,10 +226,9 @@ main(int argc, char **argv)
|
||||
strerror(errno));
|
||||
}
|
||||
|
||||
logger_init(&local_options, progname, local_options.loglevel,
|
||||
local_options.logfacility);
|
||||
logger_init(&local_options, progname());
|
||||
if (verbose)
|
||||
logger_min_verbose(LOG_INFO);
|
||||
logger_set_verbose();
|
||||
|
||||
if (log_type == REPMGR_SYSLOG)
|
||||
{
|
||||
@@ -247,6 +242,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
|
||||
/* Initialise the repmgr schema name */
|
||||
/* XXX check this handles quoting properly */
|
||||
maxlen_snprintf(repmgr_schema, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX,
|
||||
local_options.cluster_name);
|
||||
|
||||
@@ -264,7 +260,7 @@ main(int argc, char **argv)
|
||||
if (server_version_num > 0)
|
||||
{
|
||||
log_err(_("%s requires PostgreSQL %s or later\n"),
|
||||
progname,
|
||||
progname(),
|
||||
MIN_SUPPORTED_VERSION) ;
|
||||
}
|
||||
else
|
||||
@@ -278,11 +274,18 @@ main(int argc, char **argv)
|
||||
/* Retrieve record for this node from the local database */
|
||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||
|
||||
/* No node record found - exit gracefully */
|
||||
/*
|
||||
* No node record found - exit gracefully
|
||||
*
|
||||
* Note: it's highly unlikely this situation will occur when starting
|
||||
* repmgrd on a witness, unless someone goes to the trouble of
|
||||
* deleting the node record from the previously copied table.
|
||||
*/
|
||||
|
||||
if (node_info.node_id == NODE_NOT_FOUND)
|
||||
{
|
||||
log_err(_("No metadata record found for this node - terminating\n"));
|
||||
log_notice(_("HINT: was this node registered with 'repmgr (master|standby) register'?\n"));
|
||||
log_hint(_("Check that 'repmgr (master|standby) register' was executed for this node\n"));
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -295,9 +298,12 @@ main(int argc, char **argv)
|
||||
*/
|
||||
do
|
||||
{
|
||||
/* Timer for repl_nodes synchronisation interval */
|
||||
int sync_repl_nodes_elapsed = 0;
|
||||
|
||||
/*
|
||||
* Set my server mode, establish a connection to master and start
|
||||
* monitor
|
||||
* monitoring
|
||||
*/
|
||||
|
||||
switch (node_info.type)
|
||||
@@ -397,8 +403,8 @@ main(int argc, char **argv)
|
||||
local_options.cluster_name);
|
||||
|
||||
master_conn = get_master_connection(my_local_conn,
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
local_options.cluster_name,
|
||||
&master_options.node, NULL);
|
||||
|
||||
if (master_conn == NULL)
|
||||
{
|
||||
@@ -406,8 +412,7 @@ main(int argc, char **argv)
|
||||
initPQExpBuffer(&errmsg);
|
||||
|
||||
appendPQExpBuffer(&errmsg,
|
||||
_("unable to connect to master node '%s'"),
|
||||
local_options.cluster_name);
|
||||
_("unable to connect to master node"));
|
||||
|
||||
log_err("%s\n", errmsg.data);
|
||||
|
||||
@@ -457,7 +462,7 @@ main(int argc, char **argv)
|
||||
|
||||
do
|
||||
{
|
||||
log_debug("standby check loop...\n");
|
||||
log_verbose(LOG_DEBUG, "standby check loop...\n");
|
||||
|
||||
if (node_info.type == WITNESS)
|
||||
{
|
||||
@@ -467,8 +472,27 @@ main(int argc, char **argv)
|
||||
{
|
||||
standby_monitor();
|
||||
}
|
||||
|
||||
sleep(local_options.monitor_interval_secs);
|
||||
|
||||
/*
|
||||
* On a witness node, regularly resync the repl_nodes table
|
||||
* to keep up with any changes on the primary
|
||||
*
|
||||
* TODO: only resync the table if changes actually detected
|
||||
*/
|
||||
if (node_info.type == WITNESS)
|
||||
{
|
||||
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
|
||||
log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
|
||||
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
|
||||
{
|
||||
log_debug(_("Resyncing repl_nodes table\n"));
|
||||
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||
sync_repl_nodes_elapsed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (got_SIGHUP)
|
||||
{
|
||||
/*
|
||||
@@ -483,6 +507,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
got_SIGHUP = false;
|
||||
}
|
||||
|
||||
if (failover_done)
|
||||
{
|
||||
log_debug(_("standby check loop will terminate\n"));
|
||||
@@ -558,10 +583,10 @@ witness_monitor(void)
|
||||
{
|
||||
log_warning(
|
||||
_("unable to determine a valid master server; waiting %i seconds to retry...\n"),
|
||||
local_options.reconnect_intvl
|
||||
local_options.reconnect_interval
|
||||
);
|
||||
PQfinish(master_conn);
|
||||
sleep(local_options.reconnect_intvl);
|
||||
sleep(local_options.reconnect_interval);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -575,7 +600,7 @@ witness_monitor(void)
|
||||
* XXX it would be neat to be able to handle this with e.g. table-based
|
||||
* logical replication
|
||||
*/
|
||||
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
||||
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -671,14 +696,15 @@ standby_monitor(void)
|
||||
PGresult *res;
|
||||
char monitor_standby_timestamp[MAXLEN];
|
||||
char last_wal_master_location[MAXLEN];
|
||||
char last_wal_standby_received[MAXLEN];
|
||||
char last_wal_standby_applied[MAXLEN];
|
||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||
char last_xlog_receive_location[MAXLEN];
|
||||
char last_xlog_replay_location[MAXLEN];
|
||||
char last_xact_replay_timestamp[MAXLEN];
|
||||
bool last_xlog_receive_location_gte_replayed;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
XLogRecPtr lsn_master;
|
||||
XLogRecPtr lsn_standby_received;
|
||||
XLogRecPtr lsn_standby_applied;
|
||||
XLogRecPtr lsn_master_current_xlog_location;
|
||||
XLogRecPtr lsn_last_xlog_receive_location;
|
||||
XLogRecPtr lsn_last_xlog_replay_location;
|
||||
|
||||
int connection_retries,
|
||||
ret;
|
||||
@@ -701,23 +727,16 @@ standby_monitor(void)
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
|
||||
set_local_node_failed();
|
||||
set_local_node_status();
|
||||
|
||||
initPQExpBuffer(&errmsg);
|
||||
|
||||
appendPQExpBuffer(&errmsg,
|
||||
_("failed to connect to local node, node marked as failed and terminating!"));
|
||||
_("failed to connect to local node, node marked as failed!"));
|
||||
|
||||
log_err("%s\n", errmsg.data);
|
||||
|
||||
create_event_record(master_conn,
|
||||
&local_options,
|
||||
local_options.node,
|
||||
"repmgrd_shutdown",
|
||||
false,
|
||||
errmsg.data);
|
||||
|
||||
terminate(ERR_DB_CON);
|
||||
goto continue_monitoring_standby;
|
||||
}
|
||||
|
||||
upstream_conn = get_upstream_connection(my_local_conn,
|
||||
@@ -738,7 +757,7 @@ standby_monitor(void)
|
||||
check_connection(&upstream_conn, type, upstream_conninfo);
|
||||
/*
|
||||
* This takes up to local_options.reconnect_attempts *
|
||||
* local_options.reconnect_intvl seconds
|
||||
* local_options.reconnect_interval seconds
|
||||
*/
|
||||
|
||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||
@@ -846,6 +865,7 @@ standby_monitor(void)
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
continue_monitoring_standby:
|
||||
/* Check if we still are a standby, we could have been promoted */
|
||||
do
|
||||
{
|
||||
@@ -861,10 +881,13 @@ standby_monitor(void)
|
||||
* will require manual resolution as there's no way of determing
|
||||
* which master is the correct one.
|
||||
*
|
||||
* We should log a message so the user knows of the situation at hand.
|
||||
*
|
||||
* XXX check if the original master is still active and display a
|
||||
* warning
|
||||
*/
|
||||
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
||||
log_err(_("It seems this server was promoted manually (not by repmgr) so you might by in the presence of a split-brain.\n"));
|
||||
log_err(_("Check your cluster and manually fix any anomaly.\n"));
|
||||
terminate(1);
|
||||
break;
|
||||
|
||||
@@ -874,8 +897,11 @@ standby_monitor(void)
|
||||
|
||||
if (!check_connection(&my_local_conn, "standby", NULL))
|
||||
{
|
||||
set_local_node_failed();
|
||||
terminate(0);
|
||||
set_local_node_status();
|
||||
/*
|
||||
* Let's continue checking, and if the postgres server on the
|
||||
* standby comes back up, we will activate it again
|
||||
*/
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -884,14 +910,20 @@ standby_monitor(void)
|
||||
|
||||
if (did_retry)
|
||||
{
|
||||
log_info(_("standby connection recovered!\n"));
|
||||
/*
|
||||
* There's a possible situation where the standby went down for some reason
|
||||
* (maintenance for example) and is now up and maybe connected once again to
|
||||
* the stream. If we set the local standby node as failed and it's now running
|
||||
* and receiving replication data, we should activate it again.
|
||||
*/
|
||||
set_local_node_status();
|
||||
log_info(_("standby connection recovered!\n"));
|
||||
}
|
||||
|
||||
/* Fast path for the case where no history is requested */
|
||||
if (!monitoring_history)
|
||||
return;
|
||||
|
||||
|
||||
/*
|
||||
* If original master has gone away we'll need to get the new one
|
||||
* from the upstream node to write monitoring information
|
||||
@@ -953,7 +985,8 @@ standby_monitor(void)
|
||||
/* Get local xlog info */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp() ");
|
||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
|
||||
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
|
||||
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -965,13 +998,33 @@ standby_monitor(void)
|
||||
}
|
||||
|
||||
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* In the unusual event of a standby becoming disconnected from the primary,
|
||||
* while this repmgrd remains connected to the primary, subtracting
|
||||
* "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
|
||||
* (long long unsigned int) will result in a meaningless, very large
|
||||
* value which will overflow a BIGINT column and spew error messages into the
|
||||
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
||||
* to insert a monitoring record.
|
||||
*/
|
||||
if (last_xlog_receive_location_gte_replayed == false)
|
||||
{
|
||||
log_verbose(LOG_WARNING,
|
||||
"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get master xlog info */
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
|
||||
|
||||
res = PQexec(master_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -985,35 +1038,47 @@ standby_monitor(void)
|
||||
PQclear(res);
|
||||
|
||||
/* Calculate the lag */
|
||||
lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
|
||||
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
|
||||
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
|
||||
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
|
||||
|
||||
/*
|
||||
* Build the SQL to execute on master
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_monitor "
|
||||
" (primary_node, standby_node, "
|
||||
" last_monitor_time, last_apply_time, "
|
||||
" last_wal_primary_location, last_wal_standby_location, "
|
||||
" replication_lag, apply_lag ) "
|
||||
" VALUES(%d, %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s', '%s', "
|
||||
" %llu, %llu) ",
|
||||
" (primary_node, "
|
||||
" standby_node, "
|
||||
" last_monitor_time, "
|
||||
" last_apply_time, "
|
||||
" last_wal_primary_location, "
|
||||
" last_wal_standby_location, "
|
||||
" replication_lag, "
|
||||
" apply_lag ) "
|
||||
" VALUES(%d, "
|
||||
" %d, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||
" '%s', "
|
||||
" '%s', "
|
||||
" %llu, "
|
||||
" %llu) ",
|
||||
get_repmgr_schema_quoted(master_conn),
|
||||
master_options.node, local_options.node,
|
||||
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
|
||||
last_wal_master_location, last_wal_standby_received,
|
||||
(long long unsigned int)(lsn_master - lsn_standby_received),
|
||||
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
|
||||
master_options.node,
|
||||
local_options.node,
|
||||
monitor_standby_timestamp,
|
||||
last_xact_replay_timestamp,
|
||||
last_wal_master_location,
|
||||
last_xlog_receive_location,
|
||||
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location),
|
||||
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location));
|
||||
|
||||
/*
|
||||
* Execute the query asynchronously, but don't check for a result. We will
|
||||
* check the result next time we pause for a monitor step.
|
||||
*/
|
||||
log_debug("standby_monitor: %s\n", sqlquery);
|
||||
log_verbose(LOG_DEBUG, "standby_monitor:() %s\n", sqlquery);
|
||||
|
||||
if (PQsendQuery(master_conn, sqlquery) == 0)
|
||||
log_warning(_("query could not be sent to master. %s\n"),
|
||||
PQerrorMessage(master_conn));
|
||||
@@ -1044,7 +1109,7 @@ do_master_failover(void)
|
||||
XLogRecPtr xlog_recptr;
|
||||
bool lsn_format_ok;
|
||||
|
||||
char last_wal_standby_applied[MAXLEN];
|
||||
char last_xlog_replay_location[MAXLEN];
|
||||
|
||||
PGconn *node_conn = NULL;
|
||||
|
||||
@@ -1055,10 +1120,10 @@ do_master_failover(void)
|
||||
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
||||
|
||||
/* Store details of the failed node here */
|
||||
t_node_info failed_master = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
t_node_info failed_master = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
/* Store details of the best candidate for promotion to master here */
|
||||
t_node_info best_candidate = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
/* get a list of standby nodes, including myself */
|
||||
sprintf(sqlquery,
|
||||
@@ -1187,12 +1252,13 @@ do_master_failover(void)
|
||||
terminate(ERR_FAILOVER_FAIL);
|
||||
}
|
||||
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||
res = PQexec(node_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_info(_("unable to retrieve node's last standby location: %s\n"),
|
||||
PQerrorMessage(node_conn));
|
||||
|
||||
log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
|
||||
PQclear(res);
|
||||
PQfinish(node_conn);
|
||||
@@ -1218,7 +1284,7 @@ do_master_failover(void)
|
||||
}
|
||||
|
||||
/* last we get info about this node, and update shared memory */
|
||||
sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
||||
sprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
@@ -1226,8 +1292,8 @@ do_master_failover(void)
|
||||
" considered as new master and exit.\n"),
|
||||
PQerrorMessage(my_local_conn));
|
||||
PQclear(res);
|
||||
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
||||
update_shared_memory(last_wal_standby_applied);
|
||||
sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
|
||||
update_shared_memory(last_xlog_replay_location);
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
/* write last location in shared memory */
|
||||
@@ -1284,7 +1350,7 @@ do_master_failover(void)
|
||||
res = PQexec(node_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not"
|
||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not "
|
||||
"be considered as new master and exit.\n"),
|
||||
PQerrorMessage(node_conn));
|
||||
PQclear(res);
|
||||
@@ -1308,7 +1374,7 @@ do_master_failover(void)
|
||||
log_crit(
|
||||
_("unable to obtain LSN from node %i"), nodes[i].node_id
|
||||
);
|
||||
log_info(
|
||||
log_hint(
|
||||
_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
|
||||
);
|
||||
|
||||
@@ -1336,6 +1402,9 @@ do_master_failover(void)
|
||||
PQclear(res);
|
||||
|
||||
/* If position is 0/0, keep checking */
|
||||
/* XXX we should add a timeout here to prevent infinite looping
|
||||
* if the other node's repmgrd is not up
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1413,8 +1482,7 @@ do_master_failover(void)
|
||||
/* wait */
|
||||
sleep(5);
|
||||
|
||||
if (verbose)
|
||||
log_info(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||
|
||||
log_debug(_("promote command is: \"%s\"\n"),
|
||||
local_options.promote_command);
|
||||
@@ -1463,10 +1531,8 @@ do_master_failover(void)
|
||||
/* wait */
|
||||
sleep(10);
|
||||
|
||||
if (verbose)
|
||||
log_info(_("node %d is the best candidate to be the new master, we should follow it...\n"),
|
||||
best_candidate.node_id);
|
||||
log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
|
||||
log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"),
|
||||
best_candidate.node_id);
|
||||
|
||||
/*
|
||||
* The new master may some time to be promoted. The follow command
|
||||
@@ -1477,57 +1543,23 @@ do_master_failover(void)
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
/*
|
||||
* If 9.4 or later, and replication slots in use, we'll need to create a
|
||||
* slot on the new master
|
||||
*/
|
||||
new_master_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
||||
|
||||
if (local_options.use_replication_slots)
|
||||
{
|
||||
if (create_replication_slot(new_master_conn, node_info.slot_name) == false)
|
||||
{
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Unable to create slot '%s' on the master node: %s"),
|
||||
node_info.slot_name,
|
||||
PQerrorMessage(new_master_conn));
|
||||
|
||||
log_err("%s\n", event_details.data);
|
||||
|
||||
create_event_record(new_master_conn,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_follow",
|
||||
false,
|
||||
event_details.data);
|
||||
|
||||
PQfinish(new_master_conn);
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
}
|
||||
log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
||||
|
||||
r = system(local_options.follow_command);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("follow command failed. You could check and try it manually.\n"));
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* and reconnect to the local database */
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
|
||||
/* update node information to reflect new status */
|
||||
if (update_node_record_set_upstream(new_master_conn, local_options.cluster_name, node_info.node_id, best_candidate.node_id) == false)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Unable to update node record for node %i (following new upstream node %i)"),
|
||||
node_info.node_id,
|
||||
best_candidate.node_id);
|
||||
_("Unable to execute follow command:\n %s"),
|
||||
local_options.follow_command);
|
||||
|
||||
log_err("%s\n", event_details.data);
|
||||
|
||||
create_event_record(new_master_conn,
|
||||
/* It won't be possible to write to the event notification
|
||||
* table but we should be able to generate an external notification
|
||||
* if required.
|
||||
*/
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_follow",
|
||||
@@ -1537,13 +1569,20 @@ do_master_failover(void)
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* and reconnect to the local database */
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
|
||||
/* update internal record for this node*/
|
||||
new_master_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
||||
|
||||
node_info = get_node_info(new_master_conn, local_options.cluster_name, local_options.node);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Node %i now following new upstream node %i"),
|
||||
_("node %i now following new upstream node %i"),
|
||||
node_info.node_id,
|
||||
best_candidate.node_id);
|
||||
|
||||
log_notice("%s\n", event_details.data);
|
||||
|
||||
create_event_record(new_master_conn,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
@@ -1570,6 +1609,8 @@ do_master_failover(void)
|
||||
* It might be worth providing a selection of reconnection strategies
|
||||
* as different behaviour might be desirable in different situations;
|
||||
* or maybe the option not to reconnect might be required?
|
||||
*
|
||||
* XXX check this handles replication slots gracefully
|
||||
*/
|
||||
static bool
|
||||
do_upstream_standby_failover(t_node_info upstream_node)
|
||||
@@ -1578,6 +1619,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int upstream_node_id = node_info.upstream_node_id;
|
||||
int r;
|
||||
PQExpBufferData event_details;
|
||||
|
||||
log_debug(_("do_upstream_standby_failover(): performing failover for node %i\n"),
|
||||
node_info.node_id);
|
||||
@@ -1647,26 +1689,65 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
sleep(local_options.reconnect_intvl);
|
||||
sleep(local_options.reconnect_interval);
|
||||
}
|
||||
|
||||
/* Close the connection to this server */
|
||||
PQfinish(my_local_conn);
|
||||
my_local_conn = NULL;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
/* Follow new upstream */
|
||||
r = system(local_options.follow_command);
|
||||
if (r != 0)
|
||||
{
|
||||
log_err(_("follow command failed. You could check and try it manually.\n"));
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Unable to execute follow command:\n %s"),
|
||||
local_options.follow_command);
|
||||
|
||||
log_err("%s\n", event_details.data);
|
||||
|
||||
/* It won't be possible to write to the event notification
|
||||
* table but we should be able to generate an external notification
|
||||
* if required.
|
||||
*/
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_follow",
|
||||
false,
|
||||
event_details.data);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (update_node_record_set_upstream(master_conn, local_options.cluster_name, node_info.node_id, upstream_node_id) == false)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("Unable to set node %i's new upstream ID to %i"),
|
||||
node_info.node_id,
|
||||
upstream_node_id);
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_follow",
|
||||
false,
|
||||
event_details.data);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node %i is now following upstream node %i"),
|
||||
node_info.node_id,
|
||||
upstream_node_id);
|
||||
|
||||
create_event_record(NULL,
|
||||
&local_options,
|
||||
node_info.node_id,
|
||||
"repmgrd_failover_follow",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||
|
||||
return true;
|
||||
@@ -1681,7 +1762,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
|
||||
/*
|
||||
* Check if the node is still available if after
|
||||
* local_options.reconnect_attempts * local_options.reconnect_intvl
|
||||
* local_options.reconnect_attempts * local_options.reconnect_interval
|
||||
* seconds of retries we cannot reconnect return false
|
||||
*/
|
||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||
@@ -1699,9 +1780,9 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
{
|
||||
log_warning(_("connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
|
||||
type,
|
||||
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
|
||||
/* wait local_options.reconnect_intvl seconds between retries */
|
||||
sleep(local_options.reconnect_intvl);
|
||||
(local_options.reconnect_interval * (local_options.reconnect_attempts - connection_retries)));
|
||||
/* wait local_options.reconnect_interval seconds between retries */
|
||||
sleep(local_options.reconnect_interval);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1728,7 +1809,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
|
||||
|
||||
/*
|
||||
* set_local_node_failed()
|
||||
* set_local_node_status()
|
||||
*
|
||||
* If failure of the local node is detected, attempt to connect
|
||||
* to the current master server (as stored in the global variable
|
||||
@@ -1736,16 +1817,16 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
||||
*/
|
||||
|
||||
static bool
|
||||
set_local_node_failed(void)
|
||||
set_local_node_status(void)
|
||||
{
|
||||
PGresult *res;
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
int active_master_node_id = NODE_NOT_FOUND;
|
||||
int active_master_node_id = NODE_NOT_FOUND;
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
if (!check_connection(&master_conn, "master", NULL))
|
||||
{
|
||||
log_err(_("set_local_node_failed(): Unable to connect to last known master node\n"));
|
||||
log_err(_("set_local_node_status(): Unable to connect to last known master node\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1799,17 +1880,16 @@ set_local_node_failed(void)
|
||||
|
||||
|
||||
/*
|
||||
* Attempt to set own record as inactive
|
||||
* Attempt to set the active record to the correct value.
|
||||
* First
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"UPDATE %s.repl_nodes "
|
||||
" SET active = FALSE "
|
||||
" WHERE id = %i ",
|
||||
get_repmgr_schema_quoted(master_conn),
|
||||
node_info.node_id);
|
||||
|
||||
res = PQexec(master_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
if (!update_node_record_status(master_conn,
|
||||
local_options.cluster_name,
|
||||
node_info.node_id,
|
||||
"standby",
|
||||
node_info.upstream_node_id,
|
||||
is_standby(my_local_conn)==1))
|
||||
{
|
||||
log_err(_("unable to set local node %i as inactive on master: %s\n"),
|
||||
node_info.node_id,
|
||||
@@ -1834,7 +1914,7 @@ check_cluster_configuration(PGconn *conn)
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT oid FROM pg_class "
|
||||
" WHERE oid = '%s.repl_nodes'::regclass ",
|
||||
get_repmgr_schema());
|
||||
get_repmgr_schema_quoted(master_conn));
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
@@ -1907,6 +1987,8 @@ check_node_configuration(void)
|
||||
/* Adding the node */
|
||||
log_info(_("adding node %d to cluster '%s'\n"),
|
||||
local_options.node, local_options.cluster_name);
|
||||
|
||||
/* XXX use create_node_record() */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes"
|
||||
" (id, cluster, name, conninfo, priority, witness) "
|
||||
@@ -1961,18 +2043,18 @@ lsn_to_xlogrecptr(char *lsn, bool *format_ok)
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
log_err(_("%s: Replicator manager daemon \n"), progname);
|
||||
log_err(_("Try \"%s --help\" for more information.\n"), progname);
|
||||
log_err(_("%s: Replicator manager daemon \n"), progname());
|
||||
log_err(_("Try \"%s --help\" for more information.\n"), progname());
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
help(const char *progname)
|
||||
help(void)
|
||||
{
|
||||
printf(_("%s: replication management daemon for PostgreSQL\n"), progname);
|
||||
printf(_("%s: replication management daemon for PostgreSQL\n"), progname());
|
||||
printf(_("\n"));
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS]\n"), progname);
|
||||
printf(_(" %s [OPTIONS]\n"), progname());
|
||||
printf(_("\n"));
|
||||
printf(_("Options:\n"));
|
||||
printf(_(" -?, --help show this help, then exit\n"));
|
||||
@@ -1983,7 +2065,7 @@ help(const char *progname)
|
||||
printf(_(" -d, --daemonize detach process from foreground\n"));
|
||||
printf(_(" -p, --pid-file=PATH write a PID file\n"));
|
||||
printf(_("\n"));
|
||||
printf(_("%s monitors a cluster of servers and optionally performs failover.\n"), progname);
|
||||
printf(_("%s monitors a cluster of servers and optionally performs failover.\n"), progname());
|
||||
}
|
||||
|
||||
|
||||
@@ -2021,14 +2103,14 @@ terminate(int retval)
|
||||
unlink(pid_file);
|
||||
}
|
||||
|
||||
log_info(_("%s terminating...\n"), progname);
|
||||
log_info(_("%s terminating...\n"), progname());
|
||||
|
||||
exit(retval);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
update_shared_memory(char *last_wal_standby_applied)
|
||||
update_shared_memory(char *last_xlog_replay_location)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
@@ -2036,7 +2118,7 @@ update_shared_memory(char *last_wal_standby_applied)
|
||||
sprintf(sqlquery,
|
||||
"SELECT %s.repmgr_update_standby_location('%s')",
|
||||
get_repmgr_schema_quoted(my_local_conn),
|
||||
last_wal_standby_applied);
|
||||
last_xlog_replay_location);
|
||||
|
||||
/* If an error happens, just inform about that and continue */
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
@@ -2226,13 +2308,13 @@ check_and_create_pid_file(const char *pid_file)
|
||||
t_node_info
|
||||
get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
{
|
||||
PGresult *res;
|
||||
int res;
|
||||
|
||||
t_node_info node_info = { NODE_NOT_FOUND, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
res = get_node_record(conn, cluster, node_id);
|
||||
res = get_node_record(conn, cluster, node_id, &node_info);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
if (res == -1)
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
initPQExpBuffer(&errmsg);
|
||||
@@ -2251,47 +2333,14 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
||||
false,
|
||||
errmsg.data);
|
||||
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
terminate(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
if (!PQntuples(res)) {
|
||||
if (res == 0)
|
||||
{
|
||||
log_warning(_("No record found record for node %i\n"), node_id);
|
||||
PQclear(res);
|
||||
node_info.node_id = NODE_NOT_FOUND;
|
||||
return node_info;
|
||||
}
|
||||
|
||||
node_info.node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
node_info.upstream_node_id = atoi(PQgetvalue(res, 0, 1));
|
||||
strncpy(node_info.conninfo_str, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
node_info.type = parse_node_type(PQgetvalue(res, 0, 3));
|
||||
strncpy(node_info.slot_name, PQgetvalue(res, 0, 4), MAXLEN);
|
||||
node_info.active = (strcmp(PQgetvalue(res, 0, 5), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return node_info;
|
||||
}
|
||||
|
||||
|
||||
static t_server_type
|
||||
parse_node_type(const char *type)
|
||||
{
|
||||
if (strcmp(type, "master") == 0)
|
||||
{
|
||||
return MASTER;
|
||||
}
|
||||
else if (strcmp(type, "standby") == 0)
|
||||
{
|
||||
return STANDBY;
|
||||
}
|
||||
else if (strcmp(type, "witness") == 0)
|
||||
{
|
||||
return WITNESS;
|
||||
}
|
||||
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# Makefile
|
||||
#
|
||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||
#
|
||||
|
||||
MODULE_big = repmgr_funcs
|
||||
|
||||
35
sql/repmgr3.0_repmgr3.1.sql
Normal file
35
sql/repmgr3.0_repmgr3.1.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Update a repmgr 3.0 installation to repmgr 3.1
|
||||
* ----------------------------------------------
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statements listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- New view "repl_show_nodes" which also displays the server's
|
||||
-- upstream node
|
||||
|
||||
CREATE VIEW repl_show_nodes AS
|
||||
SELECT rn.id, rn.conninfo, rn.type, rn.name, rn.cluster,
|
||||
rn.priority, rn.active, sq.name AS upstream_node_name
|
||||
FROM repl_nodes as rn LEFT JOIN repl_nodes AS sq ON sq.id=rn.upstream_node_id;
|
||||
|
||||
COMMIT;
|
||||
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
31
sql/repmgr3.1.1_repmgr3.1.2.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
|
||||
* --------------------------------------------------
|
||||
*
|
||||
* This update is only required if repmgrd is being used in conjunction
|
||||
* with a witness server.
|
||||
*
|
||||
* The new repmgr package should be installed first. Then
|
||||
* carry out these steps:
|
||||
*
|
||||
* 1. (If repmgrd is used) stop any running repmgrd instances
|
||||
* 2. On the master node, execute the SQL statement listed below
|
||||
* 3. (If repmgrd is used) restart repmgrd
|
||||
*/
|
||||
|
||||
/*
|
||||
* If your repmgr installation is not included in your repmgr
|
||||
* user's search path, please set the search path to the name
|
||||
* of the repmgr schema to ensure objects are installed in
|
||||
* the correct location.
|
||||
*
|
||||
* The repmgr schema is "repmgr_" + the cluster name defined in
|
||||
* 'repmgr.conf'.
|
||||
*/
|
||||
|
||||
-- SET search_path TO 'name_of_repmgr_schema';
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE;
|
||||
COMMIT;
|
||||
@@ -83,7 +83,12 @@ _PG_init(void)
|
||||
* resources in repmgr_shmem_startup().
|
||||
*/
|
||||
RequestAddinShmemSpace(repmgr_memsize());
|
||||
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
RequestNamedLWLockTranche("repmgr", 1);
|
||||
#else
|
||||
RequestAddinLWLocks(1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Install hooks.
|
||||
@@ -128,7 +133,11 @@ repmgr_shmem_startup(void)
|
||||
if (!found)
|
||||
{
|
||||
/* First time through ... */
|
||||
#if (PG_VERSION_NUM >= 90600)
|
||||
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
|
||||
#else
|
||||
shared_state->lock = LWLockAssign();
|
||||
#endif
|
||||
snprintf(shared_state->location,
|
||||
sizeof(shared_state->location), "%X/%X", 0, 0);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr_function.sql
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* uninstall_repmgr_funcs.sql
|
||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* strutil.c
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* strutil.h
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -24,12 +24,17 @@
|
||||
#include <stdlib.h>
|
||||
#include "errcode.h"
|
||||
|
||||
|
||||
#define QUERY_STR_LEN 8192
|
||||
#define MAXLEN 1024
|
||||
#define MAXLINELENGTH 4096
|
||||
#define MAXVERSIONSTR 16
|
||||
#define MAXCONNINFO 1024
|
||||
|
||||
/* Why? http://stackoverflow.com/a/5459929/398670 */
|
||||
#define STR(x) CppAsString(x)
|
||||
|
||||
#define MAXLEN_STR STR(MAXLEN)
|
||||
|
||||
extern int
|
||||
xsnprintf(char *str, size_t size, const char *format,...)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* uninstall_repmgr.sql
|
||||
*
|
||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user