mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
113 Commits
v3.2
...
REL3_0_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7cc7a278c2 | ||
|
|
20d9f978ab | ||
|
|
70a2797b9a | ||
|
|
8f62b4c9e6 | ||
|
|
310f3f31f9 | ||
|
|
4f849de95e | ||
|
|
0de4260664 | ||
|
|
fc75084e42 | ||
|
|
cfbc9dd3c6 | ||
|
|
94579b5f2e | ||
|
|
e9a25c367a | ||
|
|
3088096318 | ||
|
|
3bbd32c73c | ||
|
|
ac17033d61 | ||
|
|
711ad0a76c | ||
|
|
ad988dccce | ||
|
|
53fe3c7e5a | ||
|
|
7a439c90d0 | ||
|
|
87e5257cb8 | ||
|
|
1f240ff9b3 | ||
|
|
9d6cff0d40 | ||
|
|
f86e251430 | ||
|
|
085b7cb8b4 | ||
|
|
5ccf89ad9b | ||
|
|
6ae5401df0 | ||
|
|
4bd8190d02 | ||
|
|
efdc2355a7 | ||
|
|
61b1f72a0e | ||
|
|
882bfd9d8e | ||
|
|
c93f717305 | ||
|
|
85be96a0be | ||
|
|
ce2d4fb86f | ||
|
|
40354e1d62 | ||
|
|
3e1655f241 | ||
|
|
8387e7f65e | ||
|
|
aa4dd155b2 | ||
|
|
a171a501ab | ||
|
|
f42f771ff4 | ||
|
|
88cfcf358e | ||
|
|
ce3594d52d | ||
|
|
f64c42a514 | ||
|
|
3072139d06 | ||
|
|
3b7185fd39 | ||
|
|
819f980e76 | ||
|
|
49316fb8fb | ||
|
|
fa4ff73b87 | ||
|
|
29842f0e0d | ||
|
|
25db1ba737 | ||
|
|
7b9f6f5352 | ||
|
|
53b8f99217 | ||
|
|
95cdaac91d | ||
|
|
e7dd0f690c | ||
|
|
e0c5bb8d31 | ||
|
|
df3e55fa35 | ||
|
|
0ee2a1e6ba | ||
|
|
df05214970 | ||
|
|
bd1314d232 | ||
|
|
745566605d | ||
|
|
807dcc1038 | ||
|
|
acc0ffa81f | ||
|
|
1725e90308 | ||
|
|
2a3fb89603 | ||
|
|
8f24167f68 | ||
|
|
6ce94778d7 | ||
|
|
3a3c6d5143 | ||
|
|
73661637e9 | ||
|
|
ae84041a4e | ||
|
|
ea01d1d30b | ||
|
|
53ed8e948c | ||
|
|
43626892d0 | ||
|
|
8870b7d7f1 | ||
|
|
72b1e57251 | ||
|
|
6054da2c25 | ||
|
|
049ea4e24f | ||
|
|
5f8185ef3a | ||
|
|
66a6c15773 | ||
|
|
919fc0fbef | ||
|
|
c7c117130b | ||
|
|
df6517f167 | ||
|
|
0bf3fb0605 | ||
|
|
c2172d79a5 | ||
|
|
709276a19c | ||
|
|
3f98e1b91b | ||
|
|
8af08ab3f4 | ||
|
|
ff038a5148 | ||
|
|
f56f70c2a6 | ||
|
|
d353fe2a9f | ||
|
|
a70a44605f | ||
|
|
d14dcb3d8b | ||
|
|
249ac7c72a | ||
|
|
9d850fc4bd | ||
|
|
42cb811a07 | ||
|
|
1e202540e3 | ||
|
|
52db03d320 | ||
|
|
60d720f0c7 | ||
|
|
34af7dec2a | ||
|
|
a59ea243c0 | ||
|
|
0c5025b3d6 | ||
|
|
42b79b9b54 | ||
|
|
2e47c6b40b | ||
|
|
6fbff4747f | ||
|
|
cc567d38c8 | ||
|
|
69c552b8e0 | ||
|
|
51967d2bd8 | ||
|
|
97be9c0cda | ||
|
|
00a28fbb1e | ||
|
|
d512bac31d | ||
|
|
fb6781775d | ||
|
|
04c751a912 | ||
|
|
2615cffecc | ||
|
|
1f838f99c2 | ||
|
|
d3f119005b | ||
|
|
db6d4d8820 |
@@ -2,7 +2,7 @@ License and Contributions
|
|||||||
=========================
|
=========================
|
||||||
|
|
||||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||||
Copyright 2010-2015, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
Copyright 2010-2016, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2015, 2ndQuadrant Limited
|
Copyright (c) 2010-2016, 2ndQuadrant Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
@@ -93,7 +93,6 @@ Create the user and database to manage replication::
|
|||||||
su - postgres
|
su - postgres
|
||||||
createuser -s repmgr
|
createuser -s repmgr
|
||||||
createdb -O repmgr repmgr
|
createdb -O repmgr repmgr
|
||||||
psql -f /usr/share/postgresql/9.0/contrib/repmgr_funcs.sql repmgr
|
|
||||||
|
|
||||||
Restart the PostgreSQL server::
|
Restart the PostgreSQL server::
|
||||||
|
|
||||||
@@ -172,11 +171,13 @@ Register Master and Standby
|
|||||||
|
|
||||||
Log in to node1.
|
Log in to node1.
|
||||||
|
|
||||||
Register the node as Master::
|
Register the node as master::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||||
|
|
||||||
|
This will also create the repmgr schema and functions.
|
||||||
|
|
||||||
Log in to node2. Register it as a standby::
|
Log in to node2. Register it as a standby::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
|
|||||||
23
FAQ.md
23
FAQ.md
@@ -34,6 +34,11 @@ General
|
|||||||
replication slots, setting a higher figure will make adding new nodes
|
replication slots, setting a higher figure will make adding new nodes
|
||||||
easier.
|
easier.
|
||||||
|
|
||||||
|
- Does `repmgr` support hash indexes?
|
||||||
|
|
||||||
|
No. Hash indexes and replication do not mix well and their use is
|
||||||
|
explicitly discouraged; see:
|
||||||
|
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
||||||
|
|
||||||
`repmgr`
|
`repmgr`
|
||||||
--------
|
--------
|
||||||
@@ -96,8 +101,9 @@ General
|
|||||||
is intended to support running the witness server as a separate
|
is intended to support running the witness server as a separate
|
||||||
instance on a normal node server, rather than on its own dedicated server.
|
instance on a normal node server, rather than on its own dedicated server.
|
||||||
|
|
||||||
To specify a port for the witness server, supply the port number to
|
To specify different port for the witness server, supply the port number
|
||||||
repmgr with the `-l/--local-port` command line option.
|
in the `conninfo` string in `repmgr.conf`
|
||||||
|
(repmgr 3.0.1 and earlier: use the `-l/--local-port` option)
|
||||||
|
|
||||||
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
||||||
in `postgresql.conf` if I'm not using `repmgrd`?
|
in `postgresql.conf` if I'm not using `repmgrd`?
|
||||||
@@ -106,6 +112,14 @@ General
|
|||||||
If you later decide to run `repmgrd`, you just need to add
|
If you later decide to run `repmgrd`, you just need to add
|
||||||
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
||||||
|
|
||||||
|
- I've provided replication permission for the `repmgr` user in `pg_hba.conf`
|
||||||
|
but `repmgr`/`repmgrd` complains it can't connect to the server... Why?
|
||||||
|
|
||||||
|
`repmgr`/`repmgrd` need to be able to connect to the repmgr database
|
||||||
|
with a normal connection to query metadata. The `replication` connection
|
||||||
|
permission is for PostgreSQL's streaming replication and doesn't
|
||||||
|
necessarily need to be the `repmgr` user.
|
||||||
|
|
||||||
|
|
||||||
`repmgrd`
|
`repmgrd`
|
||||||
---------
|
---------
|
||||||
@@ -134,3 +148,8 @@ General
|
|||||||
|
|
||||||
Note that after registering a delayed standby, `repmgrd` will only start
|
Note that after registering a delayed standby, `repmgrd` will only start
|
||||||
once the metadata added in the master node has been replicated.
|
once the metadata added in the master node has been replicated.
|
||||||
|
|
||||||
|
- How can I get `repmgrd` to rotate its logfile?
|
||||||
|
|
||||||
|
Configure your system's `logrotate` service to do this; see example
|
||||||
|
in README.md
|
||||||
|
|||||||
26
HISTORY
26
HISTORY
@@ -1,4 +1,24 @@
|
|||||||
3.0.2 2015-09-
|
3.0.4 2016-01-
|
||||||
|
Remove requirement for 'archive_mode' to be enabled (Ian)
|
||||||
|
|
||||||
|
3.0.3 2016-01-04
|
||||||
|
Create replication slot if required before base backup is run (Abhijit)
|
||||||
|
standy clone: when using rsync, clean up "pg_replslot" directory (Ian)
|
||||||
|
Improve --help output (Ian)
|
||||||
|
Improve config file parsing (Ian)
|
||||||
|
Various logging output improvements, including explicit HINTS (Ian)
|
||||||
|
Add --log-level to explicitly set log level on command line (Ian)
|
||||||
|
Repurpose --verbose to display extra log output (Ian)
|
||||||
|
Add --terse to hide hints and other non-critical output (Ian)
|
||||||
|
Reference internal functions with explicit catalog path (Ian)
|
||||||
|
When following a new primary, have repmgr (not repmgrd) create the new slot (Ian)
|
||||||
|
Add /etc/repmgr.conf as a default configuration file location (Ian)
|
||||||
|
Prevent repmgrd's -v/--verbose option expecting a parameter (Ian)
|
||||||
|
Prevent invalid replication_lag values being written to the monitoring table (Ian)
|
||||||
|
Improve repmgrd behaviour when monitored standby node is temporarily
|
||||||
|
unavailable (Martín)
|
||||||
|
|
||||||
|
3.0.2 2015-10-02
|
||||||
Improve handling of --help/--version options; and improve help output (Ian)
|
Improve handling of --help/--version options; and improve help output (Ian)
|
||||||
Improve handling of situation where logfile can't be opened (Ian)
|
Improve handling of situation where logfile can't be opened (Ian)
|
||||||
Always pass -D/--pgdata option to pg_basebackup (Ian)
|
Always pass -D/--pgdata option to pg_basebackup (Ian)
|
||||||
@@ -12,7 +32,9 @@
|
|||||||
Update tablespace remapping in --rsync-only mode for 9.5 and later (Ian)
|
Update tablespace remapping in --rsync-only mode for 9.5 and later (Ian)
|
||||||
Deprecate `-l/--local-port` option - the port can be extracted
|
Deprecate `-l/--local-port` option - the port can be extracted
|
||||||
from the conninfo string in repmgr.conf (Ian)
|
from the conninfo string in repmgr.conf (Ian)
|
||||||
Add STANDBY UNREGISTE (Vik Fearing)
|
Add STANDBY UNREGISTER (Vik Fearing)
|
||||||
|
Don't fail with error when registering master if schema already defined (Ian)
|
||||||
|
Fixes to whitespace handling when parsing config file (Ian)
|
||||||
|
|
||||||
3.0.1 2015-04-16
|
3.0.1 2015-04-16
|
||||||
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -1,6 +1,6 @@
|
|||||||
#
|
#
|
||||||
# Makefile
|
# Makefile
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
|
|
||||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||||
|
|||||||
36
README.md
36
README.md
@@ -12,10 +12,13 @@ This version can use `pg_basebackup` to clone standby servers, supports
|
|||||||
replication slots and cascading replication, doesn't require a restart
|
replication slots and cascading replication, doesn't require a restart
|
||||||
after promotion, and has many usability improvements.
|
after promotion, and has many usability improvements.
|
||||||
|
|
||||||
Please continue to use `repmgr 2` with earlier PostgreSQL 9.x versions.
|
Please continue to use `repmgr 2` with PostgreSQL 9.2 and earlier.
|
||||||
For a list of changes since `repmgr 2` and instructions on upgrading to
|
For a list of changes since `repmgr 2` and instructions on upgrading to
|
||||||
`repmgr 3`, see the "Upgrading from repmgr 2" section below.
|
`repmgr 3`, see the "Upgrading from repmgr 2" section below.
|
||||||
|
|
||||||
|
For a list of frequently asked questions about `repmgr`, please refer
|
||||||
|
to the file `FAQ.md`.
|
||||||
|
|
||||||
Overview
|
Overview
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -111,7 +114,7 @@ like the following in `postgresql.conf`:
|
|||||||
# How much WAL to retain on the primary to allow a temporarily
|
# How much WAL to retain on the primary to allow a temporarily
|
||||||
# disconnected standby to catch up again. The larger this is, the
|
# disconnected standby to catch up again. The larger this is, the
|
||||||
# longer the standby can be disconnected. This is needed only in
|
# longer the standby can be disconnected. This is needed only in
|
||||||
# 9.3; in 9.4, replication slots can be used instead (see below).
|
# 9.3; from 9.4, replication slots can be used instead (see below).
|
||||||
|
|
||||||
wal_keep_segments = 5000
|
wal_keep_segments = 5000
|
||||||
|
|
||||||
@@ -141,10 +144,14 @@ running the following commands:
|
|||||||
createuser -s repmgr
|
createuser -s repmgr
|
||||||
createdb repmgr -O repmgr
|
createdb repmgr -O repmgr
|
||||||
|
|
||||||
We recommend using the name `repmgr` for both, but you can use whatever
|
We recommend using the name `repmgr` for both user and database, but you
|
||||||
name you like (and you need to set the names you chose in the `conninfo`
|
can use whatever name you like (and you need to set the names you chose
|
||||||
string in `repmgr.conf`; see below). `repmgr` will create the schema and
|
in the `conninfo` string in `repmgr.conf`; see below). We also recommend
|
||||||
objects it needs when it connects to the server.
|
that you set the `repmgr` user's search path to include the `repmgr` schema
|
||||||
|
for convenience when querying the metadata tables and views.
|
||||||
|
|
||||||
|
The `repmgr` application will create its metadata schema in the `repmgr`
|
||||||
|
database when the master server is registered.
|
||||||
|
|
||||||
### repmgr configuration
|
### repmgr configuration
|
||||||
|
|
||||||
@@ -260,6 +267,20 @@ Example log output (at default log level):
|
|||||||
[2015-03-11 13:15:40] [INFO] reloading configuration file and updating repmgr tables
|
[2015-03-11 13:15:40] [INFO] reloading configuration file and updating repmgr tables
|
||||||
[2015-03-11 13:15:40] [INFO] starting continuous standby node monitoring
|
[2015-03-11 13:15:40] [INFO] starting continuous standby node monitoring
|
||||||
|
|
||||||
|
Note that currently `repmgrd` does not provide logfile rotation. To ensure
|
||||||
|
the current logfile does not grow indefinitely, configure your system's `logrotate`
|
||||||
|
to do this. Sample configuration to rotate logfiles weekly with retention
|
||||||
|
for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
|
||||||
|
|
||||||
|
/var/log/postgresql/repmgr-9.4.log {
|
||||||
|
missingok
|
||||||
|
compress
|
||||||
|
rotate 52
|
||||||
|
maxsize 100M
|
||||||
|
weekly
|
||||||
|
create 0600 postgres postgres
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Witness server
|
Witness server
|
||||||
--------------
|
--------------
|
||||||
@@ -355,6 +376,7 @@ Following event types currently exist:
|
|||||||
standby_promote
|
standby_promote
|
||||||
witness_create
|
witness_create
|
||||||
repmgrd_start
|
repmgrd_start
|
||||||
|
repmgrd_monitor
|
||||||
repmgrd_failover_promote
|
repmgrd_failover_promote
|
||||||
repmgrd_failover_follow
|
repmgrd_failover_follow
|
||||||
|
|
||||||
@@ -598,7 +620,7 @@ exit:
|
|||||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||||
|
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||||
|
|
||||||
Support and Assistance
|
Support and Assistance
|
||||||
----------------------
|
----------------------
|
||||||
|
|||||||
37
TODO
37
TODO
@@ -7,6 +7,7 @@ Known issues in repmgr
|
|||||||
|
|
||||||
* PGPASSFILE may not be passed to pg_basebackup
|
* PGPASSFILE may not be passed to pg_basebackup
|
||||||
|
|
||||||
|
|
||||||
Planned feature improvements
|
Planned feature improvements
|
||||||
============================
|
============================
|
||||||
|
|
||||||
@@ -38,3 +39,39 @@ Planned feature improvements
|
|||||||
|
|
||||||
* make old master node ID available for event notification commands
|
* make old master node ID available for event notification commands
|
||||||
(See github issue #80).
|
(See github issue #80).
|
||||||
|
|
||||||
|
* Have pg_basebackup use replication slots, if and when support for
|
||||||
|
this is added; see:
|
||||||
|
http://www.postgresql.org/message-id/555DD2B2.7020000@gmx.net
|
||||||
|
|
||||||
|
* use "primary/standby" terminology in place of "master/slave" for consistency
|
||||||
|
with main PostrgreSQL usage
|
||||||
|
|
||||||
|
* repmgr standby clone: possibility to use barman instead of performing a new base backup
|
||||||
|
|
||||||
|
* possibility to transform a failed master into a new standby with pg_rewind
|
||||||
|
|
||||||
|
* "repmgr standby switchover" to promote a standby in a controlled manner
|
||||||
|
and convert the existing primary into a standby
|
||||||
|
|
||||||
|
* make repmgrd more robust
|
||||||
|
|
||||||
|
* repmgr: when cloning a standby using pg_basebackup and replication slots are
|
||||||
|
requested, activate the replication slot using pg_receivexlog to negate the
|
||||||
|
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
||||||
|
|
||||||
|
Usability improvements
|
||||||
|
======================
|
||||||
|
|
||||||
|
* repmgr: add interrupt handler, so that if the program is interrupted
|
||||||
|
while running a backup, an attempt can be made to execute pg_stop_backup()
|
||||||
|
on the primary, to prevent an orphaned backup state existing.
|
||||||
|
|
||||||
|
* repmgr: when unregistering a node, delete any entries in the repl_monitoring
|
||||||
|
table.
|
||||||
|
|
||||||
|
* repmgr: for "standby unregister", accept connection parameters for the
|
||||||
|
primary and perform metadata updates (and slot removal) directly on
|
||||||
|
the primary, to allow a shutdown standby to be unregistered
|
||||||
|
(currently the standby must still be running, which means the replication
|
||||||
|
slot can't be dropped).
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.c - Directories management functions
|
* check_dir.c - Directories management functions
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -320,10 +320,10 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
|||||||
}
|
}
|
||||||
else if (pg_dir && !force)
|
else if (pg_dir && !force)
|
||||||
{
|
{
|
||||||
log_warning(_("\nThis looks like a PostgreSQL directory.\n"
|
log_hint(_("This looks like a PostgreSQL directory.\n"
|
||||||
"If you are sure you want to clone here, "
|
"If you are sure you want to clone here, "
|
||||||
"please check there is no PostgreSQL server "
|
"please check there is no PostgreSQL server "
|
||||||
"running and use the --force option\n"));
|
"running and use the -F/--force option\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.h
|
* check_dir.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
415
config.c
415
config.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* config.c - Functions to parse the config file
|
* config.c - Functions to parse the config file
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -26,9 +26,25 @@
|
|||||||
|
|
||||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||||
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
static void tablespace_list_append(t_configuration_options *options, const char *arg);
|
||||||
|
static void exit_with_errors(ErrorList *config_errors);
|
||||||
|
|
||||||
|
const static char *_progname = '\0';
|
||||||
static char config_file_path[MAXPGPATH];
|
static char config_file_path[MAXPGPATH];
|
||||||
static bool config_file_provided = false;
|
static bool config_file_provided = false;
|
||||||
|
static bool config_file_found = false;
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
set_progname(const char *argv0)
|
||||||
|
{
|
||||||
|
_progname = get_progname(argv0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *
|
||||||
|
progname(void)
|
||||||
|
{
|
||||||
|
return _progname;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* load_config()
|
* load_config()
|
||||||
@@ -40,61 +56,123 @@ static bool config_file_provided = false;
|
|||||||
*
|
*
|
||||||
* Any configuration options changed in this function must also be changed in
|
* Any configuration options changed in this function must also be changed in
|
||||||
* reload_config()
|
* reload_config()
|
||||||
|
*
|
||||||
|
* NOTE: this function is called before the logger is set up, so we need
|
||||||
|
* to handle the verbose option ourselves; also the default log level is NOTICE,
|
||||||
|
* so we can't use DEBUG.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
load_config(const char *config_file, t_configuration_options *options, char *argv0)
|
load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0)
|
||||||
{
|
{
|
||||||
struct stat config;
|
struct stat stat_config;
|
||||||
/* Sanity checks */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a configuration file was provided, check it exists, otherwise
|
* If a configuration file was provided, check it exists, otherwise
|
||||||
* emit an error and terminate
|
* emit an error and terminate. We assume that if a user explicitly
|
||||||
|
* provides a configuration file, they'll want to make sure it's
|
||||||
|
* used and not fall back to any of the defaults.
|
||||||
*/
|
*/
|
||||||
if (config_file[0])
|
if (config_file[0])
|
||||||
{
|
{
|
||||||
strncpy(config_file_path, config_file, MAXPGPATH);
|
strncpy(config_file_path, config_file, MAXPGPATH);
|
||||||
canonicalize_path(config_file_path);
|
canonicalize_path(config_file_path);
|
||||||
|
|
||||||
if (stat(config_file_path, &config) != 0)
|
if (stat(config_file_path, &stat_config) != 0)
|
||||||
{
|
{
|
||||||
log_err(_("provided configuration file '%s' not found: %s\n"),
|
log_err(_("provided configuration file \"%s\" not found: %s\n"),
|
||||||
config_file,
|
config_file,
|
||||||
strerror(errno)
|
strerror(errno)
|
||||||
);
|
);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (verbose == true)
|
||||||
|
{
|
||||||
|
log_notice(_("using configuration file \"%s\"\n"), config_file);
|
||||||
|
}
|
||||||
|
|
||||||
config_file_provided = true;
|
config_file_provided = true;
|
||||||
|
config_file_found = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If no configuration file was provided, attempt to find a default file
|
* If no configuration file was provided, attempt to find a default file
|
||||||
|
* in this order:
|
||||||
|
* - current directory
|
||||||
|
* - /etc/repmgr.conf
|
||||||
|
* - default sysconfdir
|
||||||
|
*
|
||||||
|
* here we just check for the existence of the file; parse_config()
|
||||||
|
* will handle read errors etc.
|
||||||
*/
|
*/
|
||||||
if (config_file_provided == false)
|
if (config_file_provided == false)
|
||||||
{
|
{
|
||||||
char my_exec_path[MAXPGPATH];
|
char my_exec_path[MAXPGPATH];
|
||||||
char etc_path[MAXPGPATH];
|
char sysconf_etc_path[MAXPGPATH];
|
||||||
|
|
||||||
/* First check if one is in the default sysconfdir */
|
/* 1. "./repmgr.conf" */
|
||||||
|
if (verbose == true)
|
||||||
|
{
|
||||||
|
log_notice(_("looking for configuration file in current directory\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(config_file_path, MAXPGPATH, "./%s", CONFIG_FILE_NAME);
|
||||||
|
canonicalize_path(config_file_path);
|
||||||
|
|
||||||
|
if (stat(config_file_path, &stat_config) == 0)
|
||||||
|
{
|
||||||
|
config_file_found = true;
|
||||||
|
goto end_search;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 2. "/etc/repmgr.conf" */
|
||||||
|
if (verbose == true)
|
||||||
|
{
|
||||||
|
log_notice(_("looking for configuration file in /etc\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(config_file_path, MAXPGPATH, "/etc/%s", CONFIG_FILE_NAME);
|
||||||
|
if (stat(config_file_path, &stat_config) == 0)
|
||||||
|
{
|
||||||
|
config_file_found = true;
|
||||||
|
goto end_search;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3. default sysconfdir */
|
||||||
if (find_my_exec(argv0, my_exec_path) < 0)
|
if (find_my_exec(argv0, my_exec_path) < 0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, _("%s: could not find own program executable\n"), argv0);
|
fprintf(stderr, _("%s: could not find own program executable\n"), argv0);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
get_etc_path(my_exec_path, etc_path);
|
get_etc_path(my_exec_path, sysconf_etc_path);
|
||||||
|
|
||||||
snprintf(config_file_path, MAXPGPATH, "%s/repmgr.conf", etc_path);
|
if (verbose == true)
|
||||||
|
|
||||||
log_debug(_("Looking for configuration file in %s\n"), etc_path);
|
|
||||||
|
|
||||||
if (stat(config_file_path, &config) != 0)
|
|
||||||
{
|
{
|
||||||
/* Not found - default to ./repmgr.conf */
|
log_notice(_("looking for configuration file in %s"), sysconf_etc_path);
|
||||||
strncpy(config_file_path, DEFAULT_CONFIG_FILE, MAXPGPATH);
|
}
|
||||||
canonicalize_path(config_file_path);
|
|
||||||
log_debug(_("Looking for configuration file in %s\n"), config_file_path);
|
snprintf(config_file_path, MAXPGPATH, "%s/%s", sysconf_etc_path, CONFIG_FILE_NAME);
|
||||||
|
if (stat(config_file_path, &stat_config) == 0)
|
||||||
|
{
|
||||||
|
config_file_found = true;
|
||||||
|
goto end_search;
|
||||||
|
}
|
||||||
|
|
||||||
|
end_search:
|
||||||
|
if (config_file_found == true)
|
||||||
|
{
|
||||||
|
if (verbose == true)
|
||||||
|
{
|
||||||
|
log_notice(_("configuration file found at: %s\n"), config_file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (verbose == true)
|
||||||
|
{
|
||||||
|
log_notice(_("no configuration file provided or found\n"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,12 +180,19 @@ load_config(const char *config_file, t_configuration_options *options, char *arg
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse configuration file; if any errors are encountered,
|
||||||
|
* list them and exit.
|
||||||
|
*
|
||||||
|
* Ensure any default values set here are synced with repmgr.conf.sample
|
||||||
|
* and any other documentation.
|
||||||
|
*/
|
||||||
bool
|
bool
|
||||||
parse_config(t_configuration_options *options)
|
parse_config(t_configuration_options *options)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
char *s,
|
char *s,
|
||||||
buff[MAXLINELENGTH];
|
buf[MAXLINELENGTH];
|
||||||
char name[MAXLEN];
|
char name[MAXLEN];
|
||||||
char value[MAXLEN];
|
char value[MAXLEN];
|
||||||
|
|
||||||
@@ -115,36 +200,17 @@ parse_config(t_configuration_options *options)
|
|||||||
PQconninfoOption *conninfo_options;
|
PQconninfoOption *conninfo_options;
|
||||||
char *conninfo_errmsg = NULL;
|
char *conninfo_errmsg = NULL;
|
||||||
|
|
||||||
fp = fopen(config_file_path, "r");
|
/* Collate configuration file errors here for friendlier reporting */
|
||||||
|
static ErrorList config_errors = { NULL, NULL };
|
||||||
|
|
||||||
/*
|
/* Initialize configuration options with sensible defaults
|
||||||
* Since some commands don't require a config file at all, not having one
|
* note: the default log level is set in log.c and does not need
|
||||||
* isn't necessarily a problem.
|
* to be initialised here
|
||||||
*
|
|
||||||
* If the user explictly provided a configuration file and we can't
|
|
||||||
* read it we'll raise an error.
|
|
||||||
*
|
|
||||||
* If no configuration file was provided, we'll try and read the default\
|
|
||||||
* file if it exists and is readable, but won't worry if it's not.
|
|
||||||
*/
|
*/
|
||||||
if (fp == NULL)
|
|
||||||
{
|
|
||||||
if (config_file_provided)
|
|
||||||
{
|
|
||||||
log_err(_("unable to open provided configuration file '%s'; terminating\n"), config_file_path);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
log_notice(_("no configuration file provided and default file '%s' not found - "
|
|
||||||
"continuing with default values\n"),
|
|
||||||
DEFAULT_CONFIG_FILE);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize configuration options with sensible defaults */
|
|
||||||
memset(options->cluster_name, 0, sizeof(options->cluster_name));
|
memset(options->cluster_name, 0, sizeof(options->cluster_name));
|
||||||
options->node = -1;
|
options->node = -1;
|
||||||
options->upstream_node = NO_UPSTREAM_NODE;
|
options->upstream_node = NO_UPSTREAM_NODE;
|
||||||
|
options->use_replication_slots = 0;
|
||||||
memset(options->conninfo, 0, sizeof(options->conninfo));
|
memset(options->conninfo, 0, sizeof(options->conninfo));
|
||||||
options->failover = MANUAL_FAILOVER;
|
options->failover = MANUAL_FAILOVER;
|
||||||
options->priority = DEFAULT_PRIORITY;
|
options->priority = DEFAULT_PRIORITY;
|
||||||
@@ -162,7 +228,7 @@ parse_config(t_configuration_options *options)
|
|||||||
|
|
||||||
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
/* default to 6 reconnection attempts at intervals of 10 seconds */
|
||||||
options->reconnect_attempts = 6;
|
options->reconnect_attempts = 6;
|
||||||
options->reconnect_intvl = 10;
|
options->reconnect_interval = 10;
|
||||||
|
|
||||||
options->monitor_interval_secs = 2;
|
options->monitor_interval_secs = 2;
|
||||||
options->retry_promote_interval_secs = 300;
|
options->retry_promote_interval_secs = 300;
|
||||||
@@ -172,15 +238,45 @@ parse_config(t_configuration_options *options)
|
|||||||
options->tablespace_mapping.head = NULL;
|
options->tablespace_mapping.head = NULL;
|
||||||
options->tablespace_mapping.tail = NULL;
|
options->tablespace_mapping.tail = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If no configuration file available (user didn't specify and none found
|
||||||
|
* in the default locations), return with default values
|
||||||
|
*/
|
||||||
|
if (config_file_found == false)
|
||||||
|
{
|
||||||
|
log_notice(_("no configuration file provided and no default file found - "
|
||||||
|
"continuing with default values\n"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
fp = fopen(config_file_path, "r");
|
||||||
|
|
||||||
/* Read next line */
|
/*
|
||||||
while ((s = fgets(buff, sizeof buff, fp)) != NULL)
|
* A configuration file has been found, either provided by the user
|
||||||
|
* or found in one of the default locations. If we can't open it,
|
||||||
|
* fail with an error.
|
||||||
|
*/
|
||||||
|
if (fp == NULL)
|
||||||
|
{
|
||||||
|
if (config_file_provided)
|
||||||
|
{
|
||||||
|
log_err(_("unable to open provided configuration file \"%s\"; terminating\n"), config_file_path);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_err(_("unable to open default configuration file \"%s\"; terminating\n"), config_file_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read file */
|
||||||
|
while ((s = fgets(buf, sizeof buf, fp)) != NULL)
|
||||||
{
|
{
|
||||||
bool known_parameter = true;
|
bool known_parameter = true;
|
||||||
|
|
||||||
/* Parse name/value pair from line */
|
/* Parse name/value pair from line */
|
||||||
parse_line(buff, name, value);
|
parse_line(buf, name, value);
|
||||||
|
|
||||||
/* Skip blank lines */
|
/* Skip blank lines */
|
||||||
if (!strlen(name))
|
if (!strlen(name))
|
||||||
@@ -194,9 +290,9 @@ parse_config(t_configuration_options *options)
|
|||||||
if (strcmp(name, "cluster") == 0)
|
if (strcmp(name, "cluster") == 0)
|
||||||
strncpy(options->cluster_name, value, MAXLEN);
|
strncpy(options->cluster_name, value, MAXLEN);
|
||||||
else if (strcmp(name, "node") == 0)
|
else if (strcmp(name, "node") == 0)
|
||||||
options->node = atoi(value);
|
options->node = repmgr_atoi(value, "node", &config_errors);
|
||||||
else if (strcmp(name, "upstream_node") == 0)
|
else if (strcmp(name, "upstream_node") == 0)
|
||||||
options->upstream_node = atoi(value);
|
options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors);
|
||||||
else if (strcmp(name, "conninfo") == 0)
|
else if (strcmp(name, "conninfo") == 0)
|
||||||
strncpy(options->conninfo, value, MAXLEN);
|
strncpy(options->conninfo, value, MAXLEN);
|
||||||
else if (strcmp(name, "rsync_options") == 0)
|
else if (strcmp(name, "rsync_options") == 0)
|
||||||
@@ -223,12 +319,11 @@ parse_config(t_configuration_options *options)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
error_list_append(&config_errors,_("value for 'failover' must be 'automatic' or 'manual'\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "priority") == 0)
|
else if (strcmp(name, "priority") == 0)
|
||||||
options->priority = atoi(value);
|
options->priority = repmgr_atoi(value, "priority", &config_errors);
|
||||||
else if (strcmp(name, "node_name") == 0)
|
else if (strcmp(name, "node_name") == 0)
|
||||||
strncpy(options->node_name, value, MAXLEN);
|
strncpy(options->node_name, value, MAXLEN);
|
||||||
else if (strcmp(name, "promote_command") == 0)
|
else if (strcmp(name, "promote_command") == 0)
|
||||||
@@ -236,11 +331,16 @@ parse_config(t_configuration_options *options)
|
|||||||
else if (strcmp(name, "follow_command") == 0)
|
else if (strcmp(name, "follow_command") == 0)
|
||||||
strncpy(options->follow_command, value, MAXLEN);
|
strncpy(options->follow_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "master_response_timeout") == 0)
|
else if (strcmp(name, "master_response_timeout") == 0)
|
||||||
options->master_response_timeout = atoi(value);
|
options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors);
|
||||||
|
/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
|
||||||
|
* we'll switch terminology in a future release (3.1?)
|
||||||
|
*/
|
||||||
|
else if (strcmp(name, "primary_response_timeout") == 0)
|
||||||
|
options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", &config_errors);
|
||||||
else if (strcmp(name, "reconnect_attempts") == 0)
|
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||||
options->reconnect_attempts = atoi(value);
|
options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", &config_errors);
|
||||||
else if (strcmp(name, "reconnect_interval") == 0)
|
else if (strcmp(name, "reconnect_interval") == 0)
|
||||||
options->reconnect_intvl = atoi(value);
|
options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", &config_errors);
|
||||||
else if (strcmp(name, "pg_bindir") == 0)
|
else if (strcmp(name, "pg_bindir") == 0)
|
||||||
strncpy(options->pg_bindir, value, MAXLEN);
|
strncpy(options->pg_bindir, value, MAXLEN);
|
||||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||||
@@ -250,11 +350,12 @@ parse_config(t_configuration_options *options)
|
|||||||
else if (strcmp(name, "logfile") == 0)
|
else if (strcmp(name, "logfile") == 0)
|
||||||
strncpy(options->logfile, value, MAXLEN);
|
strncpy(options->logfile, value, MAXLEN);
|
||||||
else if (strcmp(name, "monitor_interval_secs") == 0)
|
else if (strcmp(name, "monitor_interval_secs") == 0)
|
||||||
options->monitor_interval_secs = atoi(value);
|
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors);
|
||||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||||
options->retry_promote_interval_secs = atoi(value);
|
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors);
|
||||||
else if (strcmp(name, "use_replication_slots") == 0)
|
else if (strcmp(name, "use_replication_slots") == 0)
|
||||||
options->use_replication_slots = atoi(value);
|
/* XXX we should have a dedicated boolean argument format */
|
||||||
|
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors);
|
||||||
else if (strcmp(name, "event_notification_command") == 0)
|
else if (strcmp(name, "event_notification_command") == 0)
|
||||||
strncpy(options->event_notification_command, value, MAXLEN);
|
strncpy(options->event_notification_command, value, MAXLEN);
|
||||||
else if (strcmp(name, "event_notifications") == 0)
|
else if (strcmp(name, "event_notifications") == 0)
|
||||||
@@ -274,8 +375,13 @@ parse_config(t_configuration_options *options)
|
|||||||
* as currently e.g. an empty `node` value will be converted to '0'.
|
* as currently e.g. an empty `node` value will be converted to '0'.
|
||||||
*/
|
*/
|
||||||
if (known_parameter == true && !strlen(value)) {
|
if (known_parameter == true && !strlen(value)) {
|
||||||
log_err(_("no value provided for parameter '%s'\n"), name);
|
char error_message_buf[MAXLEN] = "";
|
||||||
exit(ERR_BAD_CONFIG);
|
snprintf(error_message_buf,
|
||||||
|
MAXLEN,
|
||||||
|
_("no value provided for parameter \"%s\""),
|
||||||
|
name);
|
||||||
|
|
||||||
|
error_list_append(&config_errors, error_message_buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -286,64 +392,49 @@ parse_config(t_configuration_options *options)
|
|||||||
/* The following checks are for the presence of the parameter */
|
/* The following checks are for the presence of the parameter */
|
||||||
if (*options->cluster_name == '\0')
|
if (*options->cluster_name == '\0')
|
||||||
{
|
{
|
||||||
log_err(_("required parameter 'cluster' was not found\n"));
|
error_list_append(&config_errors, _("\"cluster\": parameter was not found\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->node == -1)
|
if (options->node == -1)
|
||||||
{
|
{
|
||||||
log_err(_("required parameter 'node' was not found\n"));
|
error_list_append(&config_errors, _("\"node\": parameter was not found\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options->node == 0)
|
|
||||||
{
|
|
||||||
log_err(_("'node' must be an integer greater than zero\n"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*options->node_name == '\0')
|
if (*options->node_name == '\0')
|
||||||
{
|
{
|
||||||
log_err(_("required parameter 'node_name' was not found\n"));
|
error_list_append(&config_errors, _("\"node_name\": parameter was not found\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*options->conninfo == '\0')
|
if (*options->conninfo == '\0')
|
||||||
{
|
{
|
||||||
log_err(_("required parameter 'conninfo' was not found\n"));
|
error_list_append(&config_errors, _("\"conninfo\": parameter was not found\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
/* Sanity check the provided conninfo string
|
/* Sanity check the provided conninfo string
|
||||||
*
|
*
|
||||||
* NOTE: this verifies the string format and checks for valid options
|
* NOTE: PQconninfoParse() verifies the string format and checks for valid options
|
||||||
* but does not sanity check values
|
* but does not sanity check values
|
||||||
*/
|
*/
|
||||||
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
||||||
if (conninfo_options == NULL)
|
if (conninfo_options == NULL)
|
||||||
{
|
{
|
||||||
log_err(_("Parameter 'conninfo' is invalid: %s"), conninfo_errmsg);
|
char error_message_buf[MAXLEN] = "";
|
||||||
exit(ERR_BAD_CONFIG);
|
snprintf(error_message_buf,
|
||||||
|
MAXLEN,
|
||||||
|
_("\"conninfo\": %s"),
|
||||||
|
conninfo_errmsg);
|
||||||
|
|
||||||
|
error_list_append(&config_errors, error_message_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQconninfoFree(conninfo_options);
|
PQconninfoFree(conninfo_options);
|
||||||
|
|
||||||
/* The following checks are for valid parameter values */
|
|
||||||
if (options->master_response_timeout <= 0)
|
|
||||||
{
|
|
||||||
log_err(_("'master_response_timeout' must be greater than zero\n"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->reconnect_attempts < 0)
|
if (config_errors.head != NULL)
|
||||||
{
|
{
|
||||||
log_err(_("'reconnect_attempts' must be zero or greater\n"));
|
exit_with_errors(&config_errors);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options->reconnect_intvl < 0)
|
|
||||||
{
|
|
||||||
log_err(_("'reconnect_interval' must be zero or greater\n"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -378,7 +469,7 @@ trim(char *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
parse_line(char *buff, char *name, char *value)
|
parse_line(char *buf, char *name, char *value)
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
@@ -389,10 +480,10 @@ parse_line(char *buff, char *name, char *value)
|
|||||||
for (; i < MAXLEN; ++i)
|
for (; i < MAXLEN; ++i)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (buff[i] == '=')
|
if (buf[i] == '=')
|
||||||
break;
|
break;
|
||||||
|
|
||||||
switch(buff[i])
|
switch(buf[i])
|
||||||
{
|
{
|
||||||
/* Ignore whitespace */
|
/* Ignore whitespace */
|
||||||
case ' ':
|
case ' ':
|
||||||
@@ -401,7 +492,7 @@ parse_line(char *buff, char *name, char *value)
|
|||||||
case '\t':
|
case '\t':
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
name[j++] = buff[i];
|
name[j++] = buf[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
name[j] = '\0';
|
name[j] = '\0';
|
||||||
@@ -411,9 +502,9 @@ parse_line(char *buff, char *name, char *value)
|
|||||||
*/
|
*/
|
||||||
for (; i < MAXLEN; ++i)
|
for (; i < MAXLEN; ++i)
|
||||||
{
|
{
|
||||||
if (buff[i+1] == ' ')
|
if (buf[i+1] == ' ')
|
||||||
continue;
|
continue;
|
||||||
if (buff[i+1] == '\t')
|
if (buf[i+1] == '\t')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@@ -424,12 +515,12 @@ parse_line(char *buff, char *name, char *value)
|
|||||||
*/
|
*/
|
||||||
j = 0;
|
j = 0;
|
||||||
for (++i; i < MAXLEN; ++i)
|
for (++i; i < MAXLEN; ++i)
|
||||||
if (buff[i] == '\'')
|
if (buf[i] == '\'')
|
||||||
continue;
|
continue;
|
||||||
else if (buff[i] == '#')
|
else if (buf[i] == '#')
|
||||||
break;
|
break;
|
||||||
else if (buff[i] != '\n')
|
else if (buf[i] != '\n')
|
||||||
value[j++] = buff[i];
|
value[j++] = buf[i];
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
value[j] = '\0';
|
value[j] = '\0';
|
||||||
@@ -491,7 +582,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.reconnect_intvl < 0)
|
if (new_options.reconnect_interval < 0)
|
||||||
{
|
{
|
||||||
log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
|
log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
|
||||||
return false;
|
return false;
|
||||||
@@ -610,10 +701,10 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reconnect_intvl */
|
/* reconnect_interval */
|
||||||
if (orig_options->reconnect_intvl != new_options.reconnect_intvl)
|
if (orig_options->reconnect_interval != new_options.reconnect_interval)
|
||||||
{
|
{
|
||||||
orig_options->reconnect_intvl = new_options.reconnect_intvl;
|
orig_options->reconnect_interval = new_options.reconnect_interval;
|
||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -665,6 +756,96 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
error_list_append(ErrorList *error_list, char *error_message)
|
||||||
|
{
|
||||||
|
ErrorListCell *cell;
|
||||||
|
|
||||||
|
cell = (ErrorListCell *) pg_malloc0(sizeof(ErrorListCell));
|
||||||
|
|
||||||
|
if (cell == NULL)
|
||||||
|
{
|
||||||
|
log_err(_("unable to allocate memory; terminating.\n"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
cell->error_message = pg_malloc0(MAXLEN);
|
||||||
|
strncpy(cell->error_message, error_message, MAXLEN);
|
||||||
|
|
||||||
|
if (error_list->tail)
|
||||||
|
{
|
||||||
|
error_list->tail->next = cell;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error_list->head = cell;
|
||||||
|
}
|
||||||
|
|
||||||
|
error_list->tail = cell;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert provided string to an integer using strtol;
|
||||||
|
* on error, if a callback is provided, pass the error message to that,
|
||||||
|
* otherwise exit
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
repmgr_atoi(const char *value, const char *config_item, ErrorList *error_list)
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
long longval = 0;
|
||||||
|
char error_message_buf[MAXLEN] = "";
|
||||||
|
|
||||||
|
/* It's possible that some versions of strtol() don't treat an empty
|
||||||
|
* string as an error.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (*value == '\0')
|
||||||
|
{
|
||||||
|
snprintf(error_message_buf,
|
||||||
|
MAXLEN,
|
||||||
|
_("no value provided for \"%s\""),
|
||||||
|
config_item);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
longval = strtol(value, &endptr, 10);
|
||||||
|
|
||||||
|
if (value == endptr || errno)
|
||||||
|
{
|
||||||
|
snprintf(error_message_buf,
|
||||||
|
MAXLEN,
|
||||||
|
_("\"%s\": invalid value (provided: \"%s\")"),
|
||||||
|
config_item, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Currently there are no values which could be negative */
|
||||||
|
if (longval < 0)
|
||||||
|
{
|
||||||
|
snprintf(error_message_buf,
|
||||||
|
MAXLEN,
|
||||||
|
_("\"%s\" must be zero or greater (provided: %s)"),
|
||||||
|
config_item, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Error message buffer is set */
|
||||||
|
if (error_message_buf[0] != '\0')
|
||||||
|
{
|
||||||
|
if (error_list == NULL)
|
||||||
|
{
|
||||||
|
log_err("%s\n", error_message_buf);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
error_list_append(error_list, error_message_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int32) longval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Split argument into old_dir and new_dir and append to tablespace mapping
|
* Split argument into old_dir and new_dir and append to tablespace mapping
|
||||||
@@ -797,3 +978,21 @@ parse_event_notifications_list(t_configuration_options *options, const char *arg
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
exit_with_errors(ErrorList *config_errors)
|
||||||
|
{
|
||||||
|
ErrorListCell *cell;
|
||||||
|
|
||||||
|
log_err(_("%s: following errors were found in the configuration file.\n"), progname());
|
||||||
|
|
||||||
|
for (cell = config_errors->head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
log_err("%s\n", cell->error_message);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
25
config.h
25
config.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* config.h
|
* config.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -24,6 +24,7 @@
|
|||||||
|
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
#define CONFIG_FILE_NAME "repmgr.conf"
|
||||||
|
|
||||||
typedef struct EventNotificationListCell
|
typedef struct EventNotificationListCell
|
||||||
{
|
{
|
||||||
@@ -67,7 +68,7 @@ typedef struct
|
|||||||
char ssh_options[QUERY_STR_LEN];
|
char ssh_options[QUERY_STR_LEN];
|
||||||
int master_response_timeout;
|
int master_response_timeout;
|
||||||
int reconnect_attempts;
|
int reconnect_attempts;
|
||||||
int reconnect_intvl;
|
int reconnect_interval;
|
||||||
char pg_bindir[MAXLEN];
|
char pg_bindir[MAXLEN];
|
||||||
char pg_ctl_options[MAXLEN];
|
char pg_ctl_options[MAXLEN];
|
||||||
char pg_basebackup_options[MAXLEN];
|
char pg_basebackup_options[MAXLEN];
|
||||||
@@ -82,11 +83,29 @@ typedef struct
|
|||||||
|
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||||
|
|
||||||
|
typedef struct ErrorListCell
|
||||||
|
{
|
||||||
|
struct ErrorListCell *next;
|
||||||
|
char *error_message;
|
||||||
|
} ErrorListCell;
|
||||||
|
|
||||||
bool load_config(const char *config_file, t_configuration_options *options, char *argv0);
|
typedef struct ErrorList
|
||||||
|
{
|
||||||
|
ErrorListCell *head;
|
||||||
|
ErrorListCell *tail;
|
||||||
|
} ErrorList;
|
||||||
|
|
||||||
|
void set_progname(const char *argv0);
|
||||||
|
const char * progname(void);
|
||||||
|
|
||||||
|
bool load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0);
|
||||||
bool reload_config(t_configuration_options *orig_options);
|
bool reload_config(t_configuration_options *orig_options);
|
||||||
bool parse_config(t_configuration_options *options);
|
bool parse_config(t_configuration_options *options);
|
||||||
void parse_line(char *buff, char *name, char *value);
|
void parse_line(char *buff, char *name, char *value);
|
||||||
char *trim(char *s);
|
char *trim(char *s);
|
||||||
|
void error_list_append(ErrorList *error_list, char *error_message);
|
||||||
|
int repmgr_atoi(const char *s,
|
||||||
|
const char *config_item,
|
||||||
|
ErrorList *error_list);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
298
dbutils.c
298
dbutils.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.c - Database connection/management functions
|
* dbutils.c - Database connection/management functions
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -87,6 +87,8 @@ begin_transaction(PGconn *conn)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "begin_transaction()\n");
|
||||||
|
|
||||||
res = PQexec(conn, "BEGIN");
|
res = PQexec(conn, "BEGIN");
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -109,6 +111,8 @@ commit_transaction(PGconn *conn)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "commit_transaction()\n");
|
||||||
|
|
||||||
res = PQexec(conn, "COMMIT");
|
res = PQexec(conn, "COMMIT");
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -131,6 +135,8 @@ rollback_transaction(PGconn *conn)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "rollback_transaction()\n");
|
||||||
|
|
||||||
res = PQexec(conn, "ROLLBACK");
|
res = PQexec(conn, "ROLLBACK");
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -158,7 +164,8 @@ check_cluster_schema(PGconn *conn)
|
|||||||
"SELECT 1 FROM pg_namespace WHERE nspname = '%s'",
|
"SELECT 1 FROM pg_namespace WHERE nspname = '%s'",
|
||||||
get_repmgr_schema());
|
get_repmgr_schema());
|
||||||
|
|
||||||
log_debug(_("check_cluster_schema(): %s\n"), sqlquery);
|
log_verbose(LOG_DEBUG, "check_cluster_schema(): %s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -188,17 +195,22 @@ is_standby(PGconn *conn)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
char *sqlquery = "SELECT pg_catalog.pg_is_in_recovery()";
|
||||||
|
|
||||||
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
log_verbose(LOG_DEBUG, "is_standby(): %s\n", sqlquery);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Can't query server mode: %s"),
|
log_err(_("Unable to query server mode: %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
result = -1;
|
result = -1;
|
||||||
}
|
}
|
||||||
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
||||||
|
{
|
||||||
result = 1;
|
result = 1;
|
||||||
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return result;
|
return result;
|
||||||
@@ -285,6 +297,8 @@ get_master_node_id(PGconn *conn, char *cluster)
|
|||||||
get_repmgr_schema_quoted(conn),
|
get_repmgr_schema_quoted(conn),
|
||||||
cluster);
|
cluster);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "get_master_node_id():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -341,14 +355,17 @@ guc_set(PGconn *conn, const char *parameter, const char *op,
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int retval = 1;
|
int retval = 1;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT true FROM pg_settings "
|
||||||
" WHERE name = '%s' AND setting %s '%s'",
|
" WHERE name = '%s' AND setting %s '%s'",
|
||||||
parameter, op, value);
|
parameter, op, value);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "guc_set():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("GUC setting check PQexec failed: %s"),
|
log_err(_("guc_set(): unable to execute query\n%s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
retval = -1;
|
retval = -1;
|
||||||
}
|
}
|
||||||
@@ -379,10 +396,12 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
|||||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||||
parameter, datatype, op, value, datatype);
|
parameter, datatype, op, value, datatype);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("GUC setting check PQexec failed: %s"),
|
log_err(_("guc_set_typed(): unable to execute query\n%s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
retval = -1;
|
retval = -1;
|
||||||
}
|
}
|
||||||
@@ -403,15 +422,16 @@ get_cluster_size(PGconn *conn, char *size)
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
sqlquery_snprintf(
|
sqlquery_snprintf(sqlquery,
|
||||||
sqlquery,
|
"SELECT pg_catalog.pg_size_pretty(SUM(pg_catalog.pg_database_size(oid))::bigint) "
|
||||||
"SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
|
|
||||||
" FROM pg_database ");
|
" FROM pg_database ");
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "get_cluster_size():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("get_cluster_size(): PQexec failed: %s"),
|
log_err(_("get_cluster_size(): unable to execute query\n%s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -439,7 +459,7 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
|||||||
" FROM pg_settings WHERE name = '%s'",
|
" FROM pg_settings WHERE name = '%s'",
|
||||||
setting);
|
setting);
|
||||||
|
|
||||||
log_debug(_("get_pg_setting(): %s\n"), sqlquery);
|
log_verbose(LOG_DEBUG, "get_pg_setting(): %s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
@@ -461,13 +481,14 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("unknown parameter: %s"), PQgetvalue(res, i, 0));
|
/* XXX highly unlikely this would ever happen */
|
||||||
|
log_err(_("get_pg_setting(): unknown parameter \"%s\""), PQgetvalue(res, i, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (success == true)
|
if (success == true)
|
||||||
{
|
{
|
||||||
log_debug(_("get_pg_setting(): returned value is '%s'\n"), output);
|
log_debug(_("get_pg_setting(): returned value is \"%s\"\n"), output);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -512,13 +533,13 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
cluster,
|
cluster,
|
||||||
node_id);
|
node_id);
|
||||||
|
|
||||||
log_debug("get_upstream_connection(): %s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "get_upstream_connection():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(standby_conn, sqlquery);
|
res = PQexec(standby_conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to get conninfo for upstream server: %s\n"),
|
log_err(_("unable to get conninfo for upstream server\n%s\n"),
|
||||||
PQerrorMessage(standby_conn));
|
PQerrorMessage(standby_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -538,7 +559,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
log_debug("conninfo is: '%s'\n", upstream_conninfo);
|
log_verbose(LOG_DEBUG, "get_upstream_connection(): conninfo is \"%s\"\n", upstream_conninfo);
|
||||||
upstream_conn = establish_db_connection(upstream_conninfo, false);
|
upstream_conn = establish_db_connection(upstream_conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
@@ -553,24 +574,26 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get a connection to master by reading repl_nodes, creating a connection
|
* Read the node list from the local node and attempt to connect to each node
|
||||||
* to each node (one at a time) and finding if it is a master or a standby
|
* in turn to definitely establish if it's the cluster primary.
|
||||||
*
|
*
|
||||||
* NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to
|
* The node list is returned in the order which makes it likely that the
|
||||||
* point to allocated memory of MAXCONNINFO in length, and the master server
|
* current primary will be returned first, reducing the number of speculative
|
||||||
* connection string is placed there.
|
* connections which need to be made to other nodes.
|
||||||
|
*
|
||||||
|
* If master_conninfo_out points to allocated memory of MAXCONNINFO in length,
|
||||||
|
* the primary server's conninfo string will be copied there.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PGconn *
|
PGconn *
|
||||||
get_master_connection(PGconn *standby_conn, char *cluster,
|
get_master_connection(PGconn *standby_conn, char *cluster,
|
||||||
int *master_id, char *master_conninfo_out)
|
int *master_id, char *master_conninfo_out)
|
||||||
{
|
{
|
||||||
PGconn *master_conn = NULL;
|
PGconn *remote_conn = NULL;
|
||||||
PGresult *res1;
|
PGresult *res;
|
||||||
PGresult *res2;
|
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
char master_conninfo_stack[MAXCONNINFO];
|
char remote_conninfo_stack[MAXCONNINFO];
|
||||||
char *master_conninfo = &*master_conninfo_stack;
|
char *remote_conninfo = &*remote_conninfo_stack;
|
||||||
|
|
||||||
int i,
|
int i,
|
||||||
node_id;
|
node_id;
|
||||||
@@ -581,59 +604,60 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* find all nodes belonging to this cluster */
|
/* find all nodes belonging to this cluster */
|
||||||
log_info(_("finding node list for cluster '%s'\n"),
|
log_info(_("retrieving node list for cluster '%s'\n"),
|
||||||
cluster);
|
cluster);
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT id, conninfo "
|
" SELECT id, conninfo, "
|
||||||
|
" CASE WHEN type = 'master' THEN 1 ELSE 2 END AS type_priority"
|
||||||
" FROM %s.repl_nodes "
|
" FROM %s.repl_nodes "
|
||||||
" WHERE cluster = '%s' "
|
" WHERE cluster = '%s' "
|
||||||
" AND type != 'witness' ",
|
" AND type != 'witness' "
|
||||||
|
"ORDER BY active DESC, type_priority, priority, id",
|
||||||
get_repmgr_schema_quoted(standby_conn),
|
get_repmgr_schema_quoted(standby_conn),
|
||||||
cluster);
|
cluster);
|
||||||
|
|
||||||
res1 = PQexec(standby_conn, sqlquery);
|
log_verbose(LOG_DEBUG, "get_master_connection():\n%s\n", sqlquery);
|
||||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
|
||||||
|
res = PQexec(standby_conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to retrieve node records: %s\n"),
|
log_err(_("unable to retrieve node records: %s\n"),
|
||||||
PQerrorMessage(standby_conn));
|
PQerrorMessage(standby_conn));
|
||||||
PQclear(res1);
|
PQclear(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < PQntuples(res1); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
/* initialize with the values of the current node being processed */
|
int is_node_standby;
|
||||||
node_id = atoi(PQgetvalue(res1, i, 0));
|
|
||||||
strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO);
|
|
||||||
log_info(_("checking role of cluster node '%i'\n"),
|
|
||||||
node_id);
|
|
||||||
master_conn = establish_db_connection(master_conninfo, false);
|
|
||||||
|
|
||||||
if (PQstatus(master_conn) != CONNECTION_OK)
|
/* initialize with the values of the current node being processed */
|
||||||
|
node_id = atoi(PQgetvalue(res, i, 0));
|
||||||
|
strncpy(remote_conninfo, PQgetvalue(res, i, 1), MAXCONNINFO);
|
||||||
|
log_verbose(LOG_INFO,
|
||||||
|
_("checking role of cluster node '%i'\n"),
|
||||||
|
node_id);
|
||||||
|
remote_conn = establish_db_connection(remote_conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(remote_conn) != CONNECTION_OK)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
is_node_standby = is_standby(remote_conn);
|
||||||
* Can't use the is_standby() function here because on error that
|
|
||||||
* function closes the connection passed and exits. This still needs
|
|
||||||
* to close master_conn first.
|
|
||||||
*/
|
|
||||||
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
|
|
||||||
|
|
||||||
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
if (is_node_standby == -1)
|
||||||
{
|
{
|
||||||
log_err(_("unable to retrieve recovery state from this node: %s\n"),
|
log_err(_("unable to retrieve recovery state from node %i:\n%s\n"),
|
||||||
PQerrorMessage(master_conn));
|
node_id,
|
||||||
PQclear(res2);
|
PQerrorMessage(remote_conn));
|
||||||
PQfinish(master_conn);
|
PQfinish(remote_conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if false, this is the master */
|
/* if is_standby() returns 0, queried node is the master */
|
||||||
if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
|
if (is_node_standby == 0)
|
||||||
{
|
{
|
||||||
PQclear(res2);
|
PQclear(res);
|
||||||
PQclear(res1);
|
|
||||||
log_debug(_("get_master_connection(): current master node is %i\n"), node_id);
|
log_debug(_("get_master_connection(): current master node is %i\n"), node_id);
|
||||||
|
|
||||||
if (master_id != NULL)
|
if (master_id != NULL)
|
||||||
@@ -641,14 +665,12 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
|||||||
*master_id = node_id;
|
*master_id = node_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
return master_conn;
|
return remote_conn;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* if it is a standby, clear info */
|
|
||||||
PQclear(res2);
|
|
||||||
PQfinish(master_conn);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* if it is a standby, clear connection info and continue*/
|
||||||
|
PQfinish(remote_conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -659,7 +681,7 @@ get_master_connection(PGconn *standby_conn, char *cluster,
|
|||||||
* Probably we will need to check the error to know if we need to start
|
* Probably we will need to check the error to know if we need to start
|
||||||
* failover procedure or just fix some situation on the standby.
|
* failover procedure or just fix some situation on the standby.
|
||||||
*/
|
*/
|
||||||
PQclear(res1);
|
PQclear(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -687,7 +709,7 @@ wait_connection_availability(PGconn *conn, long long timeout)
|
|||||||
{
|
{
|
||||||
if (PQconsumeInput(conn) == 0)
|
if (PQconsumeInput(conn) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"),
|
log_warning(_("wait_connection_availability(): could not receive data from connection. %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -714,7 +736,7 @@ wait_connection_availability(PGconn *conn, long long timeout)
|
|||||||
if (select(sock, &read_set, NULL, NULL, &tmout) == -1)
|
if (select(sock, &read_set, NULL, NULL, &tmout) == -1)
|
||||||
{
|
{
|
||||||
log_warning(
|
log_warning(
|
||||||
_("wait_connection_availability: select() returned with error: %s"),
|
_("wait_connection_availability(): select() returned with error\n%s\n"),
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -730,7 +752,7 @@ wait_connection_availability(PGconn *conn, long long timeout)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_warning(_("wait_connection_availability: timeout reached"));
|
log_warning(_("wait_connection_availability(): timeout reached"));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -765,6 +787,12 @@ cancel_query(PGconn *conn, int timeout)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Return the repmgr schema as an unmodified string
|
||||||
|
* This is useful for displaying the schema name in log messages,
|
||||||
|
* however inclusion in SQL statements, get_repmgr_schema_quoted() should
|
||||||
|
* always be used.
|
||||||
|
*/
|
||||||
char *
|
char *
|
||||||
get_repmgr_schema(void)
|
get_repmgr_schema(void)
|
||||||
{
|
{
|
||||||
@@ -806,6 +834,8 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
" WHERE slot_name = '%s' ",
|
" WHERE slot_name = '%s' ",
|
||||||
slot_name);
|
slot_name);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -826,7 +856,7 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||||
{
|
{
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
log_debug(_("Replication slot '%s' exists but is inactive; reusing\n"),
|
log_debug("Replication slot '%s' exists but is inactive; reusing\n",
|
||||||
slot_name);
|
slot_name);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -842,6 +872,7 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
slot_name);
|
slot_name);
|
||||||
|
|
||||||
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
|
||||||
|
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -857,6 +888,33 @@ create_replication_slot(PGconn *conn, char *slot_name)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
drop_replication_slot(PGconn *conn, char *slot_name)
|
||||||
|
{
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
PGresult *res;
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT pg_drop_replication_slot('%s')",
|
||||||
|
slot_name);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "drop_replication_slot():\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_err(_("unable to drop replication slot \"%s\":\n %s\n"),
|
||||||
|
slot_name,
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "replication slot \"%s\" successfully dropped\n",
|
||||||
|
slot_name);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
|
start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
|
||||||
@@ -865,11 +923,11 @@ start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld', %s))",
|
"SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_start_backup('repmgr_standby_clone_%ld', %s))",
|
||||||
time(NULL),
|
time(NULL),
|
||||||
fast_checkpoint ? "TRUE" : "FALSE");
|
fast_checkpoint ? "TRUE" : "FALSE");
|
||||||
|
|
||||||
log_debug(_("standby clone: %s\n"), sqlquery);
|
log_verbose(LOG_DEBUG, "start_backup():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -884,7 +942,7 @@ start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
|
|||||||
char *first_wal_seg_pq = PQgetvalue(res, 0, 0);
|
char *first_wal_seg_pq = PQgetvalue(res, 0, 0);
|
||||||
size_t buf_sz = strlen(first_wal_seg_pq);
|
size_t buf_sz = strlen(first_wal_seg_pq);
|
||||||
|
|
||||||
first_wal_segment = malloc(buf_sz + 1);
|
first_wal_segment = pg_malloc0(buf_sz + 1);
|
||||||
xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq);
|
xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -900,7 +958,7 @@ stop_backup(PGconn *conn, char *last_wal_segment)
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
|
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_stop_backup())");
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -915,7 +973,7 @@ stop_backup(PGconn *conn, char *last_wal_segment)
|
|||||||
char *last_wal_seg_pq = PQgetvalue(res, 0, 0);
|
char *last_wal_seg_pq = PQgetvalue(res, 0, 0);
|
||||||
size_t buf_sz = strlen(last_wal_seg_pq);
|
size_t buf_sz = strlen(last_wal_seg_pq);
|
||||||
|
|
||||||
last_wal_segment = malloc(buf_sz + 1);
|
last_wal_segment = pg_malloc0(buf_sz + 1);
|
||||||
xsnprintf(last_wal_segment, buf_sz + 1, "%s", last_wal_seg_pq);
|
xsnprintf(last_wal_segment, buf_sz + 1, "%s", last_wal_seg_pq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -936,6 +994,8 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
|
|||||||
config_param,
|
config_param,
|
||||||
state ? "TRUE" : "FALSE");
|
state ? "TRUE" : "FALSE");
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "set_config_bool():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -967,11 +1027,13 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
|
||||||
log_debug("copy_configuration: %s\n", sqlquery);
|
|
||||||
|
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(witnessconn, sqlquery);
|
res = PQexec(witnessconn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Cannot clean node details in the witness, %s\n",
|
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
|
||||||
PQerrorMessage(witnessconn));
|
PQerrorMessage(witnessconn));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -979,10 +1041,13 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
||||||
get_repmgr_schema_quoted(masterconn));
|
get_repmgr_schema_quoted(masterconn));
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(masterconn, sqlquery);
|
res = PQexec(masterconn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Can't get configuration from master: %s\n",
|
log_err("Unable to retrieve node records from master:\n%s\n",
|
||||||
PQerrorMessage(masterconn));
|
PQerrorMessage(masterconn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
@@ -991,9 +1056,11 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
bool node_record_created;
|
bool node_record_created;
|
||||||
char *witness = PQgetvalue(res, i, 4);
|
|
||||||
|
|
||||||
log_debug(_("copy_configuration(): %s\n"), witness);
|
log_verbose(LOG_DEBUG,
|
||||||
|
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||||
|
PQgetvalue(res, i, 4),
|
||||||
|
PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
node_record_created = create_node_record(witnessconn,
|
node_record_created = create_node_record(witnessconn,
|
||||||
"copy_configuration",
|
"copy_configuration",
|
||||||
@@ -1013,7 +1080,9 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
|
|
||||||
if (node_record_created == false)
|
if (node_record_created == false)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Unable to copy node record to witness database: %s\n",
|
PQclear(res);
|
||||||
|
|
||||||
|
log_err("Unable to copy node record to witness database\n%s\n",
|
||||||
PQerrorMessage(witnessconn));
|
PQerrorMessage(witnessconn));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1069,6 +1138,7 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
maxlen_snprintf(slot_name_buf, "%s", "NULL");
|
maxlen_snprintf(slot_name_buf, "%s", "NULL");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* XXX convert to placeholder query */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_nodes "
|
"INSERT INTO %s.repl_nodes "
|
||||||
" (id, type, upstream_node_id, cluster, "
|
" (id, type, upstream_node_id, cluster, "
|
||||||
@@ -1084,15 +1154,17 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
slot_name_buf,
|
slot_name_buf,
|
||||||
priority);
|
priority);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||||
|
|
||||||
if (action != NULL)
|
if (action != NULL)
|
||||||
{
|
{
|
||||||
log_debug(_("%s: %s\n"), action, sqlquery);
|
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("Unable to create node record: %s\n"),
|
log_err(_("Unable to create node record\n%s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
@@ -1115,15 +1187,18 @@ delete_node_record(PGconn *conn, int node, char *action)
|
|||||||
" WHERE id = %d",
|
" WHERE id = %d",
|
||||||
get_repmgr_schema_quoted(conn),
|
get_repmgr_schema_quoted(conn),
|
||||||
node);
|
node);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "delete_node_record(): %s\n", sqlquery);
|
||||||
|
|
||||||
if (action != NULL)
|
if (action != NULL)
|
||||||
{
|
{
|
||||||
log_debug(_("%s: %s\n"), action, sqlquery);
|
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("Unable to delete node record: %s\n"),
|
log_err(_("Unable to delete node record: %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
@@ -1195,6 +1270,8 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
" RETURNING event_timestamp ",
|
" RETURNING event_timestamp ",
|
||||||
get_repmgr_schema_quoted(conn));
|
get_repmgr_schema_quoted(conn));
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "create_event_record():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexecParams(conn,
|
res = PQexecParams(conn,
|
||||||
sqlquery,
|
sqlquery,
|
||||||
4,
|
4,
|
||||||
@@ -1206,7 +1283,6 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
|
|
||||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
|
|
||||||
log_warning(_("Unable to create event record: %s\n"),
|
log_warning(_("Unable to create event record: %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
|
|
||||||
@@ -1217,7 +1293,7 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
{
|
{
|
||||||
/* Store timestamp to send to the notification command */
|
/* Store timestamp to send to the notification command */
|
||||||
strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
log_debug(_("Event timestamp is: %s\n"), event_timestamp);
|
log_verbose(LOG_DEBUG, "create_event_record(): Event timestamp is \"%s\"\n", event_timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -1337,12 +1413,13 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
|
|
||||||
*dst_ptr = '\0';
|
*dst_ptr = '\0';
|
||||||
|
|
||||||
log_debug(_("Executing: %s\n"), parsed_command);
|
log_debug("create_event_record(): executing\n%s\n", parsed_command);
|
||||||
|
|
||||||
r = system(parsed_command);
|
r = system(parsed_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("Unable to execute event notification command\n"));
|
log_warning(_("Unable to execute event notification command\n"));
|
||||||
|
log_info(_("Parsed event notification command was:\n%s\n"), parsed_command);
|
||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1350,6 +1427,50 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update node record following change of status
|
||||||
|
* (e.g. inactive primary converted to standby)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active)
|
||||||
|
{
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
" UPDATE %s.repl_nodes "
|
||||||
|
" SET type = '%s', "
|
||||||
|
" upstream_node_id = %i, "
|
||||||
|
" active = %s "
|
||||||
|
" WHERE cluster = '%s' "
|
||||||
|
" AND id = %i ",
|
||||||
|
get_repmgr_schema_quoted(conn),
|
||||||
|
type,
|
||||||
|
upstream_node_id,
|
||||||
|
active ? "TRUE" : "FALSE",
|
||||||
|
cluster_name,
|
||||||
|
this_node_id);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "update_node_record_status():\n%s\n", sqlquery);
|
||||||
|
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to update node record: %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id)
|
update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id)
|
||||||
{
|
{
|
||||||
@@ -1367,6 +1488,9 @@ update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_
|
|||||||
new_upstream_node_id,
|
new_upstream_node_id,
|
||||||
cluster_name,
|
cluster_name,
|
||||||
this_node_id);
|
this_node_id);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "update_node_record_set_upstream():\n%s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -1398,7 +1522,7 @@ get_node_record(PGconn *conn, char *cluster, int node_id)
|
|||||||
cluster,
|
cluster,
|
||||||
node_id);
|
node_id);
|
||||||
|
|
||||||
log_debug("get_node_record(): %s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);
|
||||||
|
|
||||||
return PQexec(conn, sqlquery);
|
return PQexec(conn, sqlquery);
|
||||||
}
|
}
|
||||||
|
|||||||
45
dbutils.h
45
dbutils.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.h
|
* dbutils.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -20,10 +20,51 @@
|
|||||||
#ifndef _REPMGR_DBUTILS_H_
|
#ifndef _REPMGR_DBUTILS_H_
|
||||||
#define _REPMGR_DBUTILS_H_
|
#define _REPMGR_DBUTILS_H_
|
||||||
|
|
||||||
|
#include "access/xlogdefs.h"
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
MASTER,
|
||||||
|
STANDBY,
|
||||||
|
WITNESS
|
||||||
|
} t_server_type;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Struct to store node information
|
||||||
|
*/
|
||||||
|
typedef struct s_node_info
|
||||||
|
{
|
||||||
|
int node_id;
|
||||||
|
int upstream_node_id;
|
||||||
|
t_server_type type;
|
||||||
|
char name[MAXLEN];
|
||||||
|
char conninfo_str[MAXLEN];
|
||||||
|
char slot_name[MAXLEN];
|
||||||
|
int priority;
|
||||||
|
bool active;
|
||||||
|
bool is_ready;
|
||||||
|
bool is_visible;
|
||||||
|
XLogRecPtr xlog_location;
|
||||||
|
} t_node_info;
|
||||||
|
|
||||||
|
|
||||||
|
#define T_NODE_INFO_INITIALIZER { \
|
||||||
|
NODE_NOT_FOUND, \
|
||||||
|
NO_UPSTREAM_NODE, \
|
||||||
|
UNKNOWN, \
|
||||||
|
"", \
|
||||||
|
"", \
|
||||||
|
"", \
|
||||||
|
DEFAULT_PRIORITY, \
|
||||||
|
true, \
|
||||||
|
false, \
|
||||||
|
false, \
|
||||||
|
InvalidXLogRecPtr \
|
||||||
|
}
|
||||||
|
|
||||||
PGconn *establish_db_connection(const char *conninfo,
|
PGconn *establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
@@ -58,6 +99,7 @@ bool cancel_query(PGconn *conn, int timeout);
|
|||||||
char *get_repmgr_schema(void);
|
char *get_repmgr_schema(void);
|
||||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
char *get_repmgr_schema_quoted(PGconn *conn);
|
||||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
bool create_replication_slot(PGconn *conn, char *slot_name);
|
||||||
|
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||||
|
|
||||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||||
@@ -66,6 +108,7 @@ bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_
|
|||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||||
|
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||||
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
||||||
|
|
||||||
|
|||||||
2
debian/repmgr.repmgrd.init
vendored
2
debian/repmgr.repmgrd.init
vendored
@@ -59,7 +59,7 @@ do_stop()
|
|||||||
# 0 if daemon has been stopped
|
# 0 if daemon has been stopped
|
||||||
# 1 if daemon was already stopped
|
# 1 if daemon was already stopped
|
||||||
# other if daemon could not be stopped or a failure occurred
|
# other if daemon could not be stopped or a failure occurred
|
||||||
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN
|
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --name "$(basename $REPMGRD_BIN)"
|
||||||
}
|
}
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* errcode.h
|
* errcode.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -36,5 +36,6 @@
|
|||||||
#define ERR_SYS_FAILURE 13
|
#define ERR_SYS_FAILURE 13
|
||||||
#define ERR_BAD_BASEBACKUP 14
|
#define ERR_BAD_BASEBACKUP 14
|
||||||
#define ERR_INTERNAL 15
|
#define ERR_INTERNAL 15
|
||||||
|
#define ERR_MONITORING_FAIL 16
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
130
log.c
130
log.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.c - Logging methods
|
* log.c - Logging methods
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This module is a set of methods for logging (currently only syslog)
|
* This module is a set of methods for logging (currently only syslog)
|
||||||
*
|
*
|
||||||
@@ -39,13 +39,37 @@
|
|||||||
|
|
||||||
/* #define REPMGR_DEBUG */
|
/* #define REPMGR_DEBUG */
|
||||||
|
|
||||||
|
static int detect_log_facility(const char *facility);
|
||||||
|
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
||||||
|
|
||||||
|
int log_type = REPMGR_STDERR;
|
||||||
|
int log_level = LOG_NOTICE;
|
||||||
|
int last_log_level = LOG_NOTICE;
|
||||||
|
int verbose_logging = false;
|
||||||
|
int terse_logging = false;
|
||||||
|
|
||||||
void
|
void
|
||||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list arglist;
|
||||||
|
|
||||||
|
va_start(arglist, fmt);
|
||||||
|
_stderr_log_with_level(level_name, level, fmt, arglist);
|
||||||
|
va_end(arglist);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
||||||
{
|
{
|
||||||
time_t t;
|
time_t t;
|
||||||
struct tm *tm;
|
struct tm *tm;
|
||||||
char buff[100];
|
char buff[100];
|
||||||
va_list ap;
|
|
||||||
|
/*
|
||||||
|
* Store the requested level so that if there's a subsequent
|
||||||
|
* log_hint(), we can suppress that if appropriate.
|
||||||
|
*/
|
||||||
|
last_log_level = level;
|
||||||
|
|
||||||
if (log_level >= level)
|
if (log_level >= level)
|
||||||
{
|
{
|
||||||
@@ -54,24 +78,74 @@ stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
|||||||
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
||||||
fprintf(stderr, "%s [%s] ", buff, level_name);
|
fprintf(stderr, "%s [%s] ", buff, level_name);
|
||||||
|
|
||||||
va_start(ap, fmt);
|
|
||||||
vfprintf(stderr, fmt, ap);
|
vfprintf(stderr, fmt, ap);
|
||||||
va_end(ap);
|
|
||||||
|
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
log_hint(const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
|
||||||
static int detect_log_level(const char *level);
|
if (terse_logging == false)
|
||||||
static int detect_log_facility(const char *facility);
|
{
|
||||||
|
va_start(ap, fmt);
|
||||||
|
_stderr_log_with_level("HINT", last_log_level, fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
log_verbose(int level, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
|
||||||
|
va_start(ap, fmt);
|
||||||
|
|
||||||
|
if (verbose_logging == true)
|
||||||
|
{
|
||||||
|
switch(level)
|
||||||
|
{
|
||||||
|
case LOG_EMERG:
|
||||||
|
_stderr_log_with_level("EMERG", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_ALERT:
|
||||||
|
_stderr_log_with_level("ALERT", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_CRIT:
|
||||||
|
_stderr_log_with_level("CRIT", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_ERR:
|
||||||
|
_stderr_log_with_level("ERR", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_WARNING:
|
||||||
|
_stderr_log_with_level("WARNING", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_NOTICE:
|
||||||
|
_stderr_log_with_level("NOTICE", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_INFO:
|
||||||
|
_stderr_log_with_level("INFO", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
case LOG_DEBUG:
|
||||||
|
_stderr_log_with_level("DEBUG", level, fmt, ap);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
|
||||||
int log_type = REPMGR_STDERR;
|
|
||||||
int log_level = LOG_NOTICE;
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
logger_init(t_configuration_options * opts, const char *ident, const char *level, const char *facility)
|
logger_init(t_configuration_options * opts, const char *ident)
|
||||||
{
|
{
|
||||||
|
char *level = opts->loglevel;
|
||||||
|
char *facility = opts->logfacility;
|
||||||
|
|
||||||
int l;
|
int l;
|
||||||
int f;
|
int f;
|
||||||
|
|
||||||
@@ -95,10 +169,10 @@ logger_init(t_configuration_options * opts, const char *ident, const char *level
|
|||||||
printf("Assigned level for logger: %d\n", l);
|
printf("Assigned level for logger: %d\n", l);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (l > 0)
|
if (l >= 0)
|
||||||
log_level = l;
|
log_level = l;
|
||||||
else
|
else
|
||||||
stderr_log_warning(_("Cannot detect log level %s (use any of DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
stderr_log_warning(_("Invalid log level \"%s\" (available values: DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (facility && *facility)
|
if (facility && *facility)
|
||||||
@@ -174,9 +248,9 @@ logger_init(t_configuration_options * opts, const char *ident, const char *level
|
|||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
logger_shutdown(void)
|
logger_shutdown(void)
|
||||||
{
|
{
|
||||||
@@ -189,17 +263,32 @@ logger_shutdown(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set a minimum logging level. Intended for command line verbosity
|
* Indicate whether extra-verbose logging is required. This will
|
||||||
* options, which might increase requested logging over what's specified
|
* generate a lot of output, particularly debug logging, and should
|
||||||
* in the regular configuration file.
|
* not be permanently enabled in production.
|
||||||
|
*
|
||||||
|
* NOTE: in previous repmgr versions, this option forced the log
|
||||||
|
* level to INFO.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
logger_min_verbose(int minimum)
|
logger_set_verbose(void)
|
||||||
{
|
{
|
||||||
if (log_level < minimum)
|
verbose_logging = true;
|
||||||
log_level = minimum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Indicate whether some non-critical log messages can be omitted.
|
||||||
|
* Currently this includes warnings about irrelevant command line
|
||||||
|
* options and hints.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void logger_set_terse(void)
|
||||||
|
{
|
||||||
|
terse_logging = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
detect_log_level(const char *level)
|
detect_log_level(const char *level)
|
||||||
{
|
{
|
||||||
@@ -220,17 +309,16 @@ detect_log_level(const char *level)
|
|||||||
if (!strcmp(level, "EMERG"))
|
if (!strcmp(level, "EMERG"))
|
||||||
return LOG_EMERG;
|
return LOG_EMERG;
|
||||||
|
|
||||||
return 0;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
static int
|
||||||
detect_log_facility(const char *facility)
|
detect_log_facility(const char *facility)
|
||||||
{
|
{
|
||||||
int local = 0;
|
int local = 0;
|
||||||
|
|
||||||
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
|
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
|
||||||
{
|
{
|
||||||
|
|
||||||
local = atoi(&facility[5]);
|
local = atoi(&facility[5]);
|
||||||
|
|
||||||
switch (local)
|
switch (local)
|
||||||
|
|||||||
14
log.h
14
log.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.h
|
* log.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -112,13 +112,19 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
int detect_log_level(const char *level);
|
||||||
|
|
||||||
/* Logger initialisation and shutdown */
|
/* Logger initialisation and shutdown */
|
||||||
|
|
||||||
|
bool logger_init(t_configuration_options * opts, const char *ident);
|
||||||
|
|
||||||
bool logger_shutdown(void);
|
bool logger_shutdown(void);
|
||||||
|
|
||||||
bool logger_init(t_configuration_options * opts, const char *ident,
|
void logger_set_verbose(void);
|
||||||
const char *level, const char *facility);
|
void logger_set_terse(void);
|
||||||
|
|
||||||
void logger_min_verbose(int minimum);
|
void log_hint(const char *fmt, ...);
|
||||||
|
void log_verbose(int level, const char *fmt, ...);
|
||||||
|
|
||||||
extern int log_type;
|
extern int log_type;
|
||||||
extern int log_level;
|
extern int log_level;
|
||||||
|
|||||||
@@ -16,11 +16,15 @@ cluster=example_cluster
|
|||||||
# Node ID and name
|
# Node ID and name
|
||||||
# (Note: we recommend to avoid naming nodes after their initial
|
# (Note: we recommend to avoid naming nodes after their initial
|
||||||
# replication funcion, as this will cause confusion when e.g.
|
# replication funcion, as this will cause confusion when e.g.
|
||||||
# "standby2" is promoted to master)
|
# "standby2" is promoted to primary)
|
||||||
node=2
|
node=2 # a unique integer
|
||||||
node_name=node2
|
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
||||||
|
# the server's hostname or another identifier unambiguously
|
||||||
|
# associated with the server to avoid confusion
|
||||||
|
|
||||||
# Database connection information
|
# Database connection information as a conninfo string
|
||||||
|
# This must be accessible to all servers in the cluster; for details see:
|
||||||
|
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
||||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
||||||
|
|
||||||
# Optional configuration items
|
# Optional configuration items
|
||||||
@@ -32,7 +36,7 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
|||||||
# when using cascading replication and a standby is to be connected to an
|
# when using cascading replication and a standby is to be connected to an
|
||||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
||||||
# must exist before the new standby can be registered. If a standby is
|
# must exist before the new standby can be registered. If a standby is
|
||||||
# to connect directly to a master node, this parameter is not required.
|
# to connect directly to a primary node, this parameter is not required.
|
||||||
#
|
#
|
||||||
# upstream_node=1
|
# upstream_node=1
|
||||||
|
|
||||||
@@ -40,7 +44,9 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
|||||||
# (default: 0)
|
# (default: 0)
|
||||||
#
|
#
|
||||||
# use_replication_slots=0
|
# use_replication_slots=0
|
||||||
|
#
|
||||||
|
# NOTE: 'max_replication_slots' should be configured for at least the
|
||||||
|
# number of standbys which will connect to the primary.
|
||||||
|
|
||||||
# Logging and monitoring settings
|
# Logging and monitoring settings
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
@@ -110,28 +116,29 @@ logfacility=STDERR
|
|||||||
#
|
#
|
||||||
# These settings are only applied when repmgrd is running.
|
# These settings are only applied when repmgrd is running.
|
||||||
|
|
||||||
# How many seconds we wait for master response before declaring master failure
|
# Number of seconds to wait for a response from the primary server before
|
||||||
|
# deciding it has failed
|
||||||
|
|
||||||
master_response_timeout=60
|
master_response_timeout=60
|
||||||
|
|
||||||
# How many time we try to reconnect to master before starting failover procedure
|
# Number of times to try and reconnect to the primary before starting
|
||||||
|
# the failover procedure
|
||||||
reconnect_attempts=6
|
reconnect_attempts=6
|
||||||
reconnect_interval=10
|
reconnect_interval=10
|
||||||
|
|
||||||
# Autofailover options
|
# Autofailover options
|
||||||
failover=automatic # one of 'automatic', 'manual'
|
failover=automatic # one of 'automatic', 'manual'
|
||||||
priority=100 # a value of zero or less prevents the node being promoted to master
|
priority=100 # a value of zero or less prevents the node being promoted to primary
|
||||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||||
|
|
||||||
# monitoring interval; default is 2s
|
# monitoring interval in seconds; default is 2
|
||||||
#
|
#
|
||||||
# monitor_interval_secs=2
|
# monitor_interval_secs=2
|
||||||
|
|
||||||
# change wait time for master; before we bail out and exit when the master
|
# change wait time for primary; before we bail out and exit when the primary
|
||||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
||||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||||
# default value is 300)
|
# default value is 300)
|
||||||
#
|
#
|
||||||
# retry_promote_interval_secs=300
|
# retry_promote_interval_secs=300
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
27
repmgr.h
27
repmgr.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.h
|
* repmgr.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -36,7 +36,6 @@
|
|||||||
#define MAXFILENAME 1024
|
#define MAXFILENAME 1024
|
||||||
#define ERRBUFF_SIZE 512
|
#define ERRBUFF_SIZE 512
|
||||||
|
|
||||||
#define DEFAULT_CONFIG_FILE "./repmgr.conf"
|
|
||||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||||
#define DEFAULT_DEST_DIR "."
|
#define DEFAULT_DEST_DIR "."
|
||||||
#define DEFAULT_MASTER_PORT "5432"
|
#define DEFAULT_MASTER_PORT "5432"
|
||||||
@@ -49,14 +48,7 @@
|
|||||||
#define AUTOMATIC_FAILOVER 1
|
#define AUTOMATIC_FAILOVER 1
|
||||||
#define NODE_NOT_FOUND -1
|
#define NODE_NOT_FOUND -1
|
||||||
#define NO_UPSTREAM_NODE -1
|
#define NO_UPSTREAM_NODE -1
|
||||||
|
#define UNKNOWN_NODE_ID -1
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
UNKNOWN = 0,
|
|
||||||
MASTER,
|
|
||||||
STANDBY,
|
|
||||||
WITNESS
|
|
||||||
} t_server_type;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -73,6 +65,7 @@ typedef struct
|
|||||||
char superuser[MAXLEN];
|
char superuser[MAXLEN];
|
||||||
char wal_keep_segments[MAXLEN];
|
char wal_keep_segments[MAXLEN];
|
||||||
bool verbose;
|
bool verbose;
|
||||||
|
bool terse;
|
||||||
bool force;
|
bool force;
|
||||||
bool wait_for_master;
|
bool wait_for_master;
|
||||||
bool ignore_rsync_warn;
|
bool ignore_rsync_warn;
|
||||||
@@ -82,6 +75,7 @@ typedef struct
|
|||||||
bool ignore_external_config_files;
|
bool ignore_external_config_files;
|
||||||
char masterport[MAXLEN];
|
char masterport[MAXLEN];
|
||||||
char localport[MAXLEN];
|
char localport[MAXLEN];
|
||||||
|
char loglevel[MAXLEN];
|
||||||
|
|
||||||
/* parameter used by CLUSTER CLEANUP */
|
/* parameter used by CLUSTER CLEANUP */
|
||||||
int keep_history;
|
int keep_history;
|
||||||
@@ -91,20 +85,9 @@ typedef struct
|
|||||||
char recovery_min_apply_delay[MAXLEN];
|
char recovery_min_apply_delay[MAXLEN];
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, "", "", 0, "", "" }
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "", "", "", 0, "", "" }
|
||||||
|
|
||||||
extern char repmgr_schema[MAXLEN];
|
extern char repmgr_schema[MAXLEN];
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
|
||||||
{
|
|
||||||
struct ErrorListCell *next;
|
|
||||||
char *error_message;
|
|
||||||
} ErrorListCell;
|
|
||||||
|
|
||||||
typedef struct ErrorList
|
|
||||||
{
|
|
||||||
ErrorListCell *head;
|
|
||||||
ErrorListCell *tail;
|
|
||||||
} ErrorList;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.sql
|
* repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
292
repmgrd.c
292
repmgrd.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd.c - Replication manager daemon
|
* repmgrd.c - Replication manager daemon
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This module connects to the nodes of a replication cluster and monitors
|
* This module connects to the nodes of a replication cluster and monitors
|
||||||
* how far are they from master
|
* how far are they from master
|
||||||
@@ -41,22 +41,6 @@
|
|||||||
#include "access/xlogdefs.h"
|
#include "access/xlogdefs.h"
|
||||||
#include "pqexpbuffer.h"
|
#include "pqexpbuffer.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* Struct to store node information
|
|
||||||
*/
|
|
||||||
typedef struct s_node_info
|
|
||||||
{
|
|
||||||
int node_id;
|
|
||||||
int upstream_node_id;
|
|
||||||
char conninfo_str[MAXLEN];
|
|
||||||
XLogRecPtr xlog_location;
|
|
||||||
t_server_type type;
|
|
||||||
bool is_ready;
|
|
||||||
bool is_visible;
|
|
||||||
char slot_name[MAXLEN];
|
|
||||||
bool active;
|
|
||||||
} t_node_info;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Local info */
|
/* Local info */
|
||||||
@@ -68,9 +52,7 @@ t_configuration_options master_options;
|
|||||||
|
|
||||||
PGconn *master_conn = NULL;
|
PGconn *master_conn = NULL;
|
||||||
|
|
||||||
const char *progname;
|
char *config_file = "";
|
||||||
|
|
||||||
char *config_file = DEFAULT_CONFIG_FILE;
|
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
bool monitoring_history = false;
|
bool monitoring_history = false;
|
||||||
t_node_info node_info;
|
t_node_info node_info;
|
||||||
@@ -81,7 +63,7 @@ char *pid_file = NULL;
|
|||||||
|
|
||||||
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
|
|
||||||
static void help(const char *progname);
|
static void help(void);
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
static void check_cluster_configuration(PGconn *conn);
|
static void check_cluster_configuration(PGconn *conn);
|
||||||
static void check_node_configuration(void);
|
static void check_node_configuration(void);
|
||||||
@@ -89,7 +71,7 @@ static void check_node_configuration(void);
|
|||||||
static void standby_monitor(void);
|
static void standby_monitor(void);
|
||||||
static void witness_monitor(void);
|
static void witness_monitor(void);
|
||||||
static bool check_connection(PGconn **conn, const char *type, const char *conninfo);
|
static bool check_connection(PGconn **conn, const char *type, const char *conninfo);
|
||||||
static bool set_local_node_failed(void);
|
static bool set_local_node_status(void);
|
||||||
|
|
||||||
static void update_shared_memory(char *last_wal_standby_applied);
|
static void update_shared_memory(char *last_wal_standby_applied);
|
||||||
static void update_registration(void);
|
static void update_registration(void);
|
||||||
@@ -158,9 +140,10 @@ main(int argc, char **argv)
|
|||||||
FILE *fd;
|
FILE *fd;
|
||||||
|
|
||||||
int server_version_num = 0;
|
int server_version_num = 0;
|
||||||
progname = get_progname(argv[0]);
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "?Vf:v:mdp:", long_options, &optindex)) != -1)
|
set_progname(argv[0]);
|
||||||
|
|
||||||
|
while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
@@ -180,10 +163,10 @@ main(int argc, char **argv)
|
|||||||
pid_file = optarg;
|
pid_file = optarg;
|
||||||
break;
|
break;
|
||||||
case '?':
|
case '?':
|
||||||
help(progname);
|
help();
|
||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
case 'V':
|
case 'V':
|
||||||
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
printf("%s %s (PostgreSQL %s)\n", progname(), REPMGR_VERSION, PG_VERSION);
|
||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
@@ -200,7 +183,7 @@ main(int argc, char **argv)
|
|||||||
* which case we'll need to refactor parse_config() not to abort,
|
* which case we'll need to refactor parse_config() not to abort,
|
||||||
* and return the error message.
|
* and return the error message.
|
||||||
*/
|
*/
|
||||||
load_config(config_file, &local_options, argv[0]);
|
load_config(config_file, verbose, &local_options, argv[0]);
|
||||||
|
|
||||||
if (daemonize)
|
if (daemonize)
|
||||||
{
|
{
|
||||||
@@ -230,10 +213,9 @@ main(int argc, char **argv)
|
|||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
logger_init(&local_options, progname, local_options.loglevel,
|
logger_init(&local_options, progname());
|
||||||
local_options.logfacility);
|
|
||||||
if (verbose)
|
if (verbose)
|
||||||
logger_min_verbose(LOG_INFO);
|
logger_set_verbose();
|
||||||
|
|
||||||
if (log_type == REPMGR_SYSLOG)
|
if (log_type == REPMGR_SYSLOG)
|
||||||
{
|
{
|
||||||
@@ -247,6 +229,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Initialise the repmgr schema name */
|
/* Initialise the repmgr schema name */
|
||||||
|
/* XXX check this handles quoting properly */
|
||||||
maxlen_snprintf(repmgr_schema, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX,
|
maxlen_snprintf(repmgr_schema, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX,
|
||||||
local_options.cluster_name);
|
local_options.cluster_name);
|
||||||
|
|
||||||
@@ -264,7 +247,7 @@ main(int argc, char **argv)
|
|||||||
if (server_version_num > 0)
|
if (server_version_num > 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s requires PostgreSQL %s or later\n"),
|
log_err(_("%s requires PostgreSQL %s or later\n"),
|
||||||
progname,
|
progname(),
|
||||||
MIN_SUPPORTED_VERSION) ;
|
MIN_SUPPORTED_VERSION) ;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -282,7 +265,7 @@ main(int argc, char **argv)
|
|||||||
if (node_info.node_id == NODE_NOT_FOUND)
|
if (node_info.node_id == NODE_NOT_FOUND)
|
||||||
{
|
{
|
||||||
log_err(_("No metadata record found for this node - terminating\n"));
|
log_err(_("No metadata record found for this node - terminating\n"));
|
||||||
log_notice(_("HINT: was this node registered with 'repmgr (master|standby) register'?\n"));
|
log_hint(_("Check that 'repmgr (master|standby) register' was executed for this node\n"));
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -407,7 +390,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to connect to master node '%s'"),
|
_("unable to connect to master node '%s'"),
|
||||||
local_options.cluster_name);
|
master_options.node_name);
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
@@ -457,7 +440,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
log_debug("standby check loop...\n");
|
log_verbose(LOG_DEBUG, "standby check loop...\n");
|
||||||
|
|
||||||
if (node_info.type == WITNESS)
|
if (node_info.type == WITNESS)
|
||||||
{
|
{
|
||||||
@@ -467,6 +450,7 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
standby_monitor();
|
standby_monitor();
|
||||||
}
|
}
|
||||||
|
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
@@ -558,10 +542,10 @@ witness_monitor(void)
|
|||||||
{
|
{
|
||||||
log_warning(
|
log_warning(
|
||||||
_("unable to determine a valid master server; waiting %i seconds to retry...\n"),
|
_("unable to determine a valid master server; waiting %i seconds to retry...\n"),
|
||||||
local_options.reconnect_intvl
|
local_options.reconnect_interval
|
||||||
);
|
);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
sleep(local_options.reconnect_intvl);
|
sleep(local_options.reconnect_interval);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -674,6 +658,7 @@ standby_monitor(void)
|
|||||||
char last_wal_standby_received[MAXLEN];
|
char last_wal_standby_received[MAXLEN];
|
||||||
char last_wal_standby_applied[MAXLEN];
|
char last_wal_standby_applied[MAXLEN];
|
||||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||||
|
bool last_wal_standby_received_gte_replayed;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
XLogRecPtr lsn_master;
|
XLogRecPtr lsn_master;
|
||||||
@@ -701,23 +686,16 @@ standby_monitor(void)
|
|||||||
{
|
{
|
||||||
PQExpBufferData errmsg;
|
PQExpBufferData errmsg;
|
||||||
|
|
||||||
set_local_node_failed();
|
set_local_node_status();
|
||||||
|
|
||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("failed to connect to local node, node marked as failed and terminating!"));
|
_("failed to connect to local node, node marked as failed!"));
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(master_conn,
|
goto continue_monitoring_standby;
|
||||||
&local_options,
|
|
||||||
local_options.node,
|
|
||||||
"repmgrd_shutdown",
|
|
||||||
false,
|
|
||||||
errmsg.data);
|
|
||||||
|
|
||||||
terminate(ERR_DB_CON);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
upstream_conn = get_upstream_connection(my_local_conn,
|
upstream_conn = get_upstream_connection(my_local_conn,
|
||||||
@@ -738,7 +716,7 @@ standby_monitor(void)
|
|||||||
check_connection(&upstream_conn, type, upstream_conninfo);
|
check_connection(&upstream_conn, type, upstream_conninfo);
|
||||||
/*
|
/*
|
||||||
* This takes up to local_options.reconnect_attempts *
|
* This takes up to local_options.reconnect_attempts *
|
||||||
* local_options.reconnect_intvl seconds
|
* local_options.reconnect_interval seconds
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
@@ -846,6 +824,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
|
|
||||||
|
continue_monitoring_standby:
|
||||||
/* Check if we still are a standby, we could have been promoted */
|
/* Check if we still are a standby, we could have been promoted */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -861,10 +840,13 @@ standby_monitor(void)
|
|||||||
* will require manual resolution as there's no way of determing
|
* will require manual resolution as there's no way of determing
|
||||||
* which master is the correct one.
|
* which master is the correct one.
|
||||||
*
|
*
|
||||||
|
* We should log a message so the user knows of the situation at hand.
|
||||||
|
*
|
||||||
* XXX check if the original master is still active and display a
|
* XXX check if the original master is still active and display a
|
||||||
* warning
|
* warning
|
||||||
*/
|
*/
|
||||||
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
log_err(_("It seems this server was promoted manually (not by repmgr) so you might by in the presence of a split-brain.\n"));
|
||||||
|
log_err(_("Check your cluster and manually fix any anomaly.\n"));
|
||||||
terminate(1);
|
terminate(1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -874,8 +856,11 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (!check_connection(&my_local_conn, "standby", NULL))
|
if (!check_connection(&my_local_conn, "standby", NULL))
|
||||||
{
|
{
|
||||||
set_local_node_failed();
|
set_local_node_status();
|
||||||
terminate(0);
|
/*
|
||||||
|
* Let's continue checking, and if the postgres server on the
|
||||||
|
* standby comes back up, we will activate it again
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@@ -884,6 +869,13 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (did_retry)
|
if (did_retry)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* There's a possible situation where the standby went down for some reason
|
||||||
|
* (maintenance for example) and is now up and maybe connected once again to
|
||||||
|
* the stream. If we set the local standby node as failed and it's now running
|
||||||
|
* and receiving replication data, we should activate it again.
|
||||||
|
*/
|
||||||
|
set_local_node_status();
|
||||||
log_info(_("standby connection recovered!\n"));
|
log_info(_("standby connection recovered!\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -891,7 +883,6 @@ standby_monitor(void)
|
|||||||
if (!monitoring_history)
|
if (!monitoring_history)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If original master has gone away we'll need to get the new one
|
* If original master has gone away we'll need to get the new one
|
||||||
* from the upstream node to write monitoring information
|
* from the upstream node to write monitoring information
|
||||||
@@ -953,7 +944,8 @@ standby_monitor(void)
|
|||||||
/* Get local xlog info */
|
/* Get local xlog info */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp() ");
|
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
|
||||||
|
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
|
||||||
|
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -968,10 +960,30 @@ standby_monitor(void)
|
|||||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||||
|
last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||||
|
? true
|
||||||
|
: false;
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the unusual event of a standby becoming disconnected from the primary,
|
||||||
|
* while this repmgrd remains connected to the primary, subtracting
|
||||||
|
* "lsn_standby_applied" from "lsn_standby_received" and coercing to
|
||||||
|
* (long long unsigned int) will result in a meaningless, very large
|
||||||
|
* value which will overflow a BIGINT column and spew error messages into the
|
||||||
|
* PostgreSQL log. In the absence of a better strategy, skip attempting
|
||||||
|
* to insert a monitoring record.
|
||||||
|
*/
|
||||||
|
if (last_wal_standby_received_gte_replayed == false)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_WARNING,
|
||||||
|
"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Get master xlog info */
|
/* Get master xlog info */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
|
||||||
|
|
||||||
res = PQexec(master_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1013,7 +1025,8 @@ standby_monitor(void)
|
|||||||
* Execute the query asynchronously, but don't check for a result. We will
|
* Execute the query asynchronously, but don't check for a result. We will
|
||||||
* check the result next time we pause for a monitor step.
|
* check the result next time we pause for a monitor step.
|
||||||
*/
|
*/
|
||||||
log_debug("standby_monitor: %s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "standby_monitor:() %s\n", sqlquery);
|
||||||
|
|
||||||
if (PQsendQuery(master_conn, sqlquery) == 0)
|
if (PQsendQuery(master_conn, sqlquery) == 0)
|
||||||
log_warning(_("query could not be sent to master. %s\n"),
|
log_warning(_("query could not be sent to master. %s\n"),
|
||||||
PQerrorMessage(master_conn));
|
PQerrorMessage(master_conn));
|
||||||
@@ -1055,10 +1068,10 @@ do_master_failover(void)
|
|||||||
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
||||||
|
|
||||||
/* Store details of the failed node here */
|
/* Store details of the failed node here */
|
||||||
t_node_info failed_master = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info failed_master = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
/* Store details of the best candidate for promotion to master here */
|
/* Store details of the best candidate for promotion to master here */
|
||||||
t_node_info best_candidate = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
/* get a list of standby nodes, including myself */
|
/* get a list of standby nodes, including myself */
|
||||||
sprintf(sqlquery,
|
sprintf(sqlquery,
|
||||||
@@ -1187,12 +1200,13 @@ do_master_failover(void)
|
|||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||||
res = PQexec(node_conn, sqlquery);
|
res = PQexec(node_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_info(_("unable to retrieve node's last standby location: %s\n"),
|
log_info(_("unable to retrieve node's last standby location: %s\n"),
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
|
|
||||||
log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
|
log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
@@ -1218,7 +1232,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* last we get info about this node, and update shared memory */
|
/* last we get info about this node, and update shared memory */
|
||||||
sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
sprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -1336,6 +1350,9 @@ do_master_failover(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* If position is 0/0, keep checking */
|
/* If position is 0/0, keep checking */
|
||||||
|
/* XXX we should add a timeout here to prevent infinite looping
|
||||||
|
* if the other node's repmgrd is not up
|
||||||
|
*/
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1413,8 +1430,7 @@ do_master_failover(void)
|
|||||||
/* wait */
|
/* wait */
|
||||||
sleep(5);
|
sleep(5);
|
||||||
|
|
||||||
if (verbose)
|
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||||
log_info(_("this node is the best candidate to be the new master, promoting...\n"));
|
|
||||||
|
|
||||||
log_debug(_("promote command is: \"%s\"\n"),
|
log_debug(_("promote command is: \"%s\"\n"),
|
||||||
local_options.promote_command);
|
local_options.promote_command);
|
||||||
@@ -1463,10 +1479,8 @@ do_master_failover(void)
|
|||||||
/* wait */
|
/* wait */
|
||||||
sleep(10);
|
sleep(10);
|
||||||
|
|
||||||
if (verbose)
|
log_info(_("node %d is the best candidate for new master, attempting to follow...\n"),
|
||||||
log_info(_("node %d is the best candidate to be the new master, we should follow it...\n"),
|
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The new master may some time to be promoted. The follow command
|
* The new master may some time to be promoted. The follow command
|
||||||
@@ -1477,73 +1491,46 @@ do_master_failover(void)
|
|||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If 9.4 or later, and replication slots in use, we'll need to create a
|
|
||||||
* slot on the new master
|
|
||||||
*/
|
|
||||||
new_master_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
|
||||||
|
|
||||||
if (local_options.use_replication_slots)
|
log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
||||||
{
|
|
||||||
if (create_replication_slot(new_master_conn, node_info.slot_name) == false)
|
|
||||||
{
|
|
||||||
|
|
||||||
|
r = system(local_options.follow_command);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("Unable to create slot '%s' on the master node: %s"),
|
_("Unable to execute follow command:\n %s"),
|
||||||
node_info.slot_name,
|
local_options.follow_command);
|
||||||
PQerrorMessage(new_master_conn));
|
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_master_conn,
|
/* It won't be possible to write to the event notification
|
||||||
|
* table but we should be able to generate an external notification
|
||||||
|
* if required.
|
||||||
|
*/
|
||||||
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
"repmgrd_failover_follow",
|
"repmgrd_failover_follow",
|
||||||
false,
|
false,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
PQfinish(new_master_conn);
|
|
||||||
terminate(ERR_DB_QUERY);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
r = system(local_options.follow_command);
|
|
||||||
if (r != 0)
|
|
||||||
{
|
|
||||||
log_err(_("follow command failed. You could check and try it manually.\n"));
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* and reconnect to the local database */
|
/* and reconnect to the local database */
|
||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
|
|
||||||
/* update node information to reflect new status */
|
|
||||||
if (update_node_record_set_upstream(new_master_conn, local_options.cluster_name, node_info.node_id, best_candidate.node_id) == false)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("Unable to update node record for node %i (following new upstream node %i)"),
|
|
||||||
node_info.node_id,
|
|
||||||
best_candidate.node_id);
|
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
|
||||||
|
|
||||||
create_event_record(new_master_conn,
|
|
||||||
&local_options,
|
|
||||||
node_info.node_id,
|
|
||||||
"repmgrd_failover_follow",
|
|
||||||
false,
|
|
||||||
event_details.data);
|
|
||||||
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update internal record for this node*/
|
/* update internal record for this node*/
|
||||||
|
new_master_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
||||||
|
|
||||||
node_info = get_node_info(new_master_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(new_master_conn, local_options.cluster_name, local_options.node);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("Node %i now following new upstream node %i"),
|
_("Node %i now following new upstream node %i"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
|
|
||||||
|
log_info("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_master_conn,
|
create_event_record(new_master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
@@ -1570,6 +1557,8 @@ do_master_failover(void)
|
|||||||
* It might be worth providing a selection of reconnection strategies
|
* It might be worth providing a selection of reconnection strategies
|
||||||
* as different behaviour might be desirable in different situations;
|
* as different behaviour might be desirable in different situations;
|
||||||
* or maybe the option not to reconnect might be required?
|
* or maybe the option not to reconnect might be required?
|
||||||
|
*
|
||||||
|
* XXX check this handles replication slots gracefully
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
do_upstream_standby_failover(t_node_info upstream_node)
|
do_upstream_standby_failover(t_node_info upstream_node)
|
||||||
@@ -1578,6 +1567,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int upstream_node_id = node_info.upstream_node_id;
|
int upstream_node_id = node_info.upstream_node_id;
|
||||||
int r;
|
int r;
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
log_debug(_("do_upstream_standby_failover(): performing failover for node %i\n"),
|
log_debug(_("do_upstream_standby_failover(): performing failover for node %i\n"),
|
||||||
node_info.node_id);
|
node_info.node_id);
|
||||||
@@ -1647,26 +1637,65 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
sleep(local_options.reconnect_intvl);
|
sleep(local_options.reconnect_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Close the connection to this server */
|
/* Close the connection to this server */
|
||||||
PQfinish(my_local_conn);
|
PQfinish(my_local_conn);
|
||||||
my_local_conn = NULL;
|
my_local_conn = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
/* Follow new upstream */
|
/* Follow new upstream */
|
||||||
r = system(local_options.follow_command);
|
r = system(local_options.follow_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("follow command failed. You could check and try it manually.\n"));
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Unable to execute follow command:\n %s"),
|
||||||
|
local_options.follow_command);
|
||||||
|
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
|
/* It won't be possible to write to the event notification
|
||||||
|
* table but we should be able to generate an external notification
|
||||||
|
* if required.
|
||||||
|
*/
|
||||||
|
create_event_record(NULL,
|
||||||
|
&local_options,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update_node_record_set_upstream(master_conn, local_options.cluster_name, node_info.node_id, upstream_node_id) == false)
|
if (update_node_record_set_upstream(master_conn, local_options.cluster_name, node_info.node_id, upstream_node_id) == false)
|
||||||
{
|
{
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Unable to set node %i's new upstream ID to %i"),
|
||||||
|
node_info.node_id,
|
||||||
|
upstream_node_id);
|
||||||
|
create_event_record(NULL,
|
||||||
|
&local_options,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Node %i is now following upstream node %i"),
|
||||||
|
node_info.node_id,
|
||||||
|
upstream_node_id);
|
||||||
|
|
||||||
|
create_event_record(NULL,
|
||||||
|
&local_options,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -1681,7 +1710,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the node is still available if after
|
* Check if the node is still available if after
|
||||||
* local_options.reconnect_attempts * local_options.reconnect_intvl
|
* local_options.reconnect_attempts * local_options.reconnect_interval
|
||||||
* seconds of retries we cannot reconnect return false
|
* seconds of retries we cannot reconnect return false
|
||||||
*/
|
*/
|
||||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
@@ -1699,9 +1728,9 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
{
|
{
|
||||||
log_warning(_("connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
|
log_warning(_("connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
|
||||||
type,
|
type,
|
||||||
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
|
(local_options.reconnect_interval * (local_options.reconnect_attempts - connection_retries)));
|
||||||
/* wait local_options.reconnect_intvl seconds between retries */
|
/* wait local_options.reconnect_interval seconds between retries */
|
||||||
sleep(local_options.reconnect_intvl);
|
sleep(local_options.reconnect_interval);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1728,7 +1757,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* set_local_node_failed()
|
* set_local_node_status()
|
||||||
*
|
*
|
||||||
* If failure of the local node is detected, attempt to connect
|
* If failure of the local node is detected, attempt to connect
|
||||||
* to the current master server (as stored in the global variable
|
* to the current master server (as stored in the global variable
|
||||||
@@ -1736,7 +1765,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
set_local_node_failed(void)
|
set_local_node_status(void)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
@@ -1745,7 +1774,7 @@ set_local_node_failed(void)
|
|||||||
|
|
||||||
if (!check_connection(&master_conn, "master", NULL))
|
if (!check_connection(&master_conn, "master", NULL))
|
||||||
{
|
{
|
||||||
log_err(_("set_local_node_failed(): Unable to connect to last known master node\n"));
|
log_err(_("set_local_node_status(): Unable to connect to last known master node\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1799,17 +1828,16 @@ set_local_node_failed(void)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to set own record as inactive
|
* Attempt to set the active record to the correct value.
|
||||||
|
* First
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
|
||||||
"UPDATE %s.repl_nodes "
|
|
||||||
" SET active = FALSE "
|
|
||||||
" WHERE id = %i ",
|
|
||||||
get_repmgr_schema_quoted(master_conn),
|
|
||||||
node_info.node_id);
|
|
||||||
|
|
||||||
res = PQexec(master_conn, sqlquery);
|
if (!update_node_record_status(master_conn,
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
local_options.cluster_name,
|
||||||
|
node_info.node_id,
|
||||||
|
"standby",
|
||||||
|
node_info.upstream_node_id,
|
||||||
|
is_standby(my_local_conn)==1))
|
||||||
{
|
{
|
||||||
log_err(_("unable to set local node %i as inactive on master: %s\n"),
|
log_err(_("unable to set local node %i as inactive on master: %s\n"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
@@ -1834,7 +1862,7 @@ check_cluster_configuration(PGconn *conn)
|
|||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT oid FROM pg_class "
|
"SELECT oid FROM pg_class "
|
||||||
" WHERE oid = '%s.repl_nodes'::regclass ",
|
" WHERE oid = '%s.repl_nodes'::regclass ",
|
||||||
get_repmgr_schema());
|
get_repmgr_schema_quoted(master_conn));
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
@@ -1961,18 +1989,18 @@ lsn_to_xlogrecptr(char *lsn, bool *format_ok)
|
|||||||
void
|
void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
log_err(_("%s: Replicator manager daemon \n"), progname);
|
log_err(_("%s: Replicator manager daemon \n"), progname());
|
||||||
log_err(_("Try \"%s --help\" for more information.\n"), progname);
|
log_err(_("Try \"%s --help\" for more information.\n"), progname());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
help(const char *progname)
|
help(void)
|
||||||
{
|
{
|
||||||
printf(_("%s: replication management daemon for PostgreSQL\n"), progname);
|
printf(_("%s: replication management daemon for PostgreSQL\n"), progname());
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
printf(_("Usage:\n"));
|
printf(_("Usage:\n"));
|
||||||
printf(_(" %s [OPTIONS]\n"), progname);
|
printf(_(" %s [OPTIONS]\n"), progname());
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
printf(_("Options:\n"));
|
printf(_("Options:\n"));
|
||||||
printf(_(" -?, --help show this help, then exit\n"));
|
printf(_(" -?, --help show this help, then exit\n"));
|
||||||
@@ -1983,7 +2011,7 @@ help(const char *progname)
|
|||||||
printf(_(" -d, --daemonize detach process from foreground\n"));
|
printf(_(" -d, --daemonize detach process from foreground\n"));
|
||||||
printf(_(" -p, --pid-file=PATH write a PID file\n"));
|
printf(_(" -p, --pid-file=PATH write a PID file\n"));
|
||||||
printf(_("\n"));
|
printf(_("\n"));
|
||||||
printf(_("%s monitors a cluster of servers and optionally performs failover.\n"), progname);
|
printf(_("%s monitors a cluster of servers and optionally performs failover.\n"), progname());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2021,7 +2049,7 @@ terminate(int retval)
|
|||||||
unlink(pid_file);
|
unlink(pid_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_info(_("%s terminating...\n"), progname);
|
log_info(_("%s terminating...\n"), progname());
|
||||||
|
|
||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
@@ -2228,7 +2256,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
t_node_info node_info = { NODE_NOT_FOUND, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
res = get_node_record(conn, cluster, node_id);
|
res = get_node_record(conn, cluster, node_id);
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#
|
#
|
||||||
# Makefile
|
# Makefile
|
||||||
#
|
#
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2015
|
# Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
#
|
#
|
||||||
|
|
||||||
MODULE_big = repmgr_funcs
|
MODULE_big = repmgr_funcs
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr_function.sql
|
* repmgr_function.sql
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* uninstall_repmgr_funcs.sql
|
* uninstall_repmgr_funcs.sql
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2015
|
* Copyright (c) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.c
|
* strutil.c
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.h
|
* strutil.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* uninstall_repmgr.sql
|
* uninstall_repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2015
|
* Copyright (C) 2ndQuadrant, 2010-2016
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user