mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
67 Commits
REL3_0_STA
...
REL2_0_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
48099e9c5f | ||
|
|
e7ca8c369f | ||
|
|
782fae6239 | ||
|
|
a492745db1 | ||
|
|
9ac066db51 | ||
|
|
20db2f52b1 | ||
|
|
341831ad69 | ||
|
|
e8bc5521a5 | ||
|
|
1d9aacfed9 | ||
|
|
e39ec70ef0 | ||
|
|
196585c78a | ||
|
|
79728ba6dd | ||
|
|
c4a47c467f | ||
|
|
36e5944b2c | ||
|
|
097bbdebfd | ||
|
|
4fa75afa26 | ||
|
|
0aae96008f | ||
|
|
2349e182d2 | ||
|
|
03a8f2eaba | ||
|
|
b6a263a40e | ||
|
|
81050899e8 | ||
|
|
51aa63c8f9 | ||
|
|
e53162deb8 | ||
|
|
6a8336b880 | ||
|
|
4a445e7f8a | ||
|
|
3c1d72a5ea | ||
|
|
d4b9a32a86 | ||
|
|
07a216ca25 | ||
|
|
d3c067f1bd | ||
|
|
e6caf11bf2 | ||
|
|
9909881d81 | ||
|
|
8073a294f0 | ||
|
|
bf5e0b9b48 | ||
|
|
2e9f4aa30f | ||
|
|
0dcacc3a70 | ||
|
|
65120c47cf | ||
|
|
f9397c0f06 | ||
|
|
af3c865b05 | ||
|
|
112a11a311 | ||
|
|
7b87b5eddd | ||
|
|
1aa36ca1c1 | ||
|
|
a7eff1f39e | ||
|
|
e64e230559 | ||
|
|
bba167db9e | ||
|
|
2676adcaed | ||
|
|
5a27d5e57b | ||
|
|
4071589ba5 | ||
|
|
6cb2376974 | ||
|
|
235c98a0b5 | ||
|
|
16da2f48c2 | ||
|
|
c23e5858f2 | ||
|
|
30ccee43d9 | ||
|
|
9357f89d12 | ||
|
|
48da11acfd | ||
|
|
07c54c296c | ||
|
|
8f0b9592e8 | ||
|
|
b35bf3f91d | ||
|
|
04c101c5f0 | ||
|
|
65989840d2 | ||
|
|
24bd4e7a3f | ||
|
|
1c67e105ff | ||
|
|
069f9ff2ed | ||
|
|
b8ade8e908 | ||
|
|
c0abb3be31 | ||
|
|
0a71123920 | ||
|
|
a72c2296e9 | ||
|
|
9c3d79147b |
@@ -1,29 +0,0 @@
|
|||||||
License and Contributions
|
|
||||||
=========================
|
|
||||||
|
|
||||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
|
||||||
Copyright 2010-2016, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
|
||||||
details.
|
|
||||||
|
|
||||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
|
||||||
|
|
||||||
Additional work has been sponsored by the 4CaaST project for cloud computing,
|
|
||||||
which has received funding from the European Union's Seventh Framework Programme
|
|
||||||
(FP7/2007-2013) under grant agreement 258862.
|
|
||||||
|
|
||||||
Contributions to `repmgr` are welcome, and will be listed in the file `CREDITS`.
|
|
||||||
2ndQuadrant Limited requires that any contributions provide a copyright
|
|
||||||
assignment and a disclaimer of any work-for-hire ownership claims from the
|
|
||||||
employer of the developer. This lets us make sure that all of the repmgr
|
|
||||||
distribution remains free code. Please contact info@2ndQuadrant.com for a
|
|
||||||
copy of the relevant Copyright Assignment Form.
|
|
||||||
|
|
||||||
Code style
|
|
||||||
----------
|
|
||||||
|
|
||||||
Code in repmgr is formatted to a consistent style using the following command:
|
|
||||||
|
|
||||||
astyle --style=ansi --indent=tab --suffix=none *.c *.h
|
|
||||||
|
|
||||||
Contributors should reformat their code similarly before submitting code to
|
|
||||||
the project, in order to minimize merge conflicts with other work.
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2016, 2ndQuadrant Limited
|
Copyright (c) 2010-2014, 2ndQuadrant Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
155
FAQ.md
155
FAQ.md
@@ -1,155 +0,0 @@
|
|||||||
FAQ - Frequently Asked Questions about repmgr
|
|
||||||
=============================================
|
|
||||||
|
|
||||||
This FAQ applies to `repmgr` 3.0 and later.
|
|
||||||
|
|
||||||
General
|
|
||||||
-------
|
|
||||||
|
|
||||||
- What's the difference between the repmgr versions?
|
|
||||||
|
|
||||||
repmgr 3.x builds on the improved replication facilities added
|
|
||||||
in PostgreSQL 9.3, as well as improved automated failover support
|
|
||||||
via `repmgrd`, and is not compatible with PostgreSQL 9.2 and earlier.
|
|
||||||
|
|
||||||
repmgr 2.x supports PostgreSQL 9.0 onwards. While it is compatible
|
|
||||||
with PostgreSQL 9.3 and later, we recommend repmgr v3.
|
|
||||||
|
|
||||||
- What's the advantage of using replication slots?
|
|
||||||
|
|
||||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
|
||||||
master server will retain WAL files until they have been consumed
|
|
||||||
by all standby servers. This makes WAL file management much easier,
|
|
||||||
and if used `repmgr` will no longer insist on a fixed number (default: 5000)
|
|
||||||
of WAL files being preserved.
|
|
||||||
|
|
||||||
(However this does mean that if a standby is no longer connected to the
|
|
||||||
master, the master will retain WAL files indefinitely).
|
|
||||||
|
|
||||||
- How many replication slots should I define in `max_replication_slots`?
|
|
||||||
|
|
||||||
Normally at least same number as the number of standbys which will connect
|
|
||||||
to the node. Note that changes to `max_replication_slots` require a server
|
|
||||||
restart to take effect, and as there is no particular penalty for unused
|
|
||||||
replication slots, setting a higher figure will make adding new nodes
|
|
||||||
easier.
|
|
||||||
|
|
||||||
- Does `repmgr` support hash indexes?
|
|
||||||
|
|
||||||
No. Hash indexes and replication do not mix well and their use is
|
|
||||||
explicitly discouraged; see:
|
|
||||||
http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
|
|
||||||
|
|
||||||
`repmgr`
|
|
||||||
--------
|
|
||||||
|
|
||||||
- When should I use the --rsync-only option?
|
|
||||||
|
|
||||||
By default, `repmgr` uses `pg_basebackup` to clone a standby from
|
|
||||||
a master. However, `pg_basebackup` copies the entire data directory, which
|
|
||||||
can take some time depending on installation size. If you have an
|
|
||||||
existing but "stale" standby, `repmgr` can use `rsync` instead,
|
|
||||||
which means only changed or added files need to be copied.
|
|
||||||
|
|
||||||
- Can I register an existing master/standby?
|
|
||||||
|
|
||||||
Yes, this is no problem.
|
|
||||||
|
|
||||||
- How can a failed master be re-added as a standby?
|
|
||||||
|
|
||||||
This is a two-stage process. First, the failed master's data directory
|
|
||||||
must be re-synced with the current master; secondly the failed master
|
|
||||||
needs to be re-registered as a standby. The section "Converting a failed
|
|
||||||
master to a standby" in the `README.md` file contains more detailed
|
|
||||||
information on this process.
|
|
||||||
|
|
||||||
- Is there an easy way to check my master server is correctly configured
|
|
||||||
for use with `repmgr`?
|
|
||||||
|
|
||||||
Yes - execute `repmgr` with the `--check-upstream-config` option, and it
|
|
||||||
will let you know which items in `postgresql.conf` need to be modified.
|
|
||||||
|
|
||||||
- Even though I specified custom `rsync` options, `repmgr` appends
|
|
||||||
the `--checksum` - why?
|
|
||||||
|
|
||||||
When syncing a stale data directory from an active server, it's
|
|
||||||
essential that `rsync` compares the content of files rather than
|
|
||||||
just timestamp and size, to ensure that all changed files are
|
|
||||||
copied and prevent corruption.
|
|
||||||
|
|
||||||
- When cloning a standby, how can I prevent `repmgr` from copying
|
|
||||||
`postgresql.conf` and `pg_hba.conf` from the PostgreSQL configuration
|
|
||||||
directory in `/etc`?
|
|
||||||
|
|
||||||
Use the command line option `--ignore-external-config-files`
|
|
||||||
|
|
||||||
- How can I prevent `repmgr` from copying local configuration files
|
|
||||||
in the data directory?
|
|
||||||
|
|
||||||
If you're updating an existing but stale data directory which
|
|
||||||
contains e.g. configuration files you don't want to be overwritten
|
|
||||||
with the same file from the master, specify the files in the
|
|
||||||
`rsync_options` configuration option, e.g.
|
|
||||||
|
|
||||||
rsync_options=--exclude=postgresql.local.conf
|
|
||||||
|
|
||||||
This option is only available when using the `--rsync-only` option.
|
|
||||||
|
|
||||||
- How can I make the witness server use a particular port?
|
|
||||||
|
|
||||||
By default the witness server is configured to use port 5499; this
|
|
||||||
is intended to support running the witness server as a separate
|
|
||||||
instance on a normal node server, rather than on its own dedicated server.
|
|
||||||
|
|
||||||
To specify different port for the witness server, supply the port number
|
|
||||||
in the `conninfo` string in `repmgr.conf`
|
|
||||||
(repmgr 3.0.1 and earlier: use the `-l/--local-port` option)
|
|
||||||
|
|
||||||
- Do I need to include `shared_preload_libraries = 'repmgr_funcs'`
|
|
||||||
in `postgresql.conf` if I'm not using `repmgrd`?
|
|
||||||
|
|
||||||
No, the `repmgr_funcs` library is only needed when running `repmgrd`.
|
|
||||||
If you later decide to run `repmgrd`, you just need to add
|
|
||||||
`shared_preload_libraries = 'repmgr_funcs'` and restart PostgreSQL.
|
|
||||||
|
|
||||||
- I've provided replication permission for the `repmgr` user in `pg_hba.conf`
|
|
||||||
but `repmgr`/`repmgrd` complains it can't connect to the server... Why?
|
|
||||||
|
|
||||||
`repmgr`/`repmgrd` need to be able to connect to the repmgr database
|
|
||||||
with a normal connection to query metadata. The `replication` connection
|
|
||||||
permission is for PostgreSQL's streaming replication and doesn't
|
|
||||||
necessarily need to be the `repmgr` user.
|
|
||||||
|
|
||||||
|
|
||||||
`repmgrd`
|
|
||||||
---------
|
|
||||||
|
|
||||||
- Do I need a witness server?
|
|
||||||
|
|
||||||
Not necessarily. However if you have an uneven number of nodes spread
|
|
||||||
over more than one network segment, a witness server will enable
|
|
||||||
better handling of a 'split brain' situation by providing a "casting
|
|
||||||
vote" on the preferred network segment.
|
|
||||||
|
|
||||||
- How can I prevent a node from ever being promoted to master?
|
|
||||||
|
|
||||||
In `repmgr.conf`, set its priority to a value of 0 or less.
|
|
||||||
|
|
||||||
- Does `repmgrd` support delayed standbys?
|
|
||||||
|
|
||||||
`repmgrd` can monitor delayed standbys - those set up with
|
|
||||||
`recovery_min_apply_delay` set to a non-zero value in `recovery.conf` -
|
|
||||||
but as it's not currently possible to directly examine the value
|
|
||||||
applied to the standby, `repmgrd` may not be able to properly evaluate
|
|
||||||
the node as a promotion candidate.
|
|
||||||
|
|
||||||
We recommend that delayed standbys are explicitly excluded from promotion
|
|
||||||
by setting `priority` to 0 in `repmgr.conf`.
|
|
||||||
|
|
||||||
Note that after registering a delayed standby, `repmgrd` will only start
|
|
||||||
once the metadata added in the master node has been replicated.
|
|
||||||
|
|
||||||
- How can I get `repmgrd` to rotate its logfile?
|
|
||||||
|
|
||||||
Configure your system's `logrotate` service to do this; see example
|
|
||||||
in README.md
|
|
||||||
68
HISTORY
68
HISTORY
@@ -1,63 +1,5 @@
|
|||||||
3.0.4 2016-01-
|
|
||||||
Remove requirement for 'archive_mode' to be enabled (Ian)
|
|
||||||
|
|
||||||
3.0.3 2016-01-04
|
|
||||||
Create replication slot if required before base backup is run (Abhijit)
|
|
||||||
standy clone: when using rsync, clean up "pg_replslot" directory (Ian)
|
|
||||||
Improve --help output (Ian)
|
|
||||||
Improve config file parsing (Ian)
|
|
||||||
Various logging output improvements, including explicit HINTS (Ian)
|
|
||||||
Add --log-level to explicitly set log level on command line (Ian)
|
|
||||||
Repurpose --verbose to display extra log output (Ian)
|
|
||||||
Add --terse to hide hints and other non-critical output (Ian)
|
|
||||||
Reference internal functions with explicit catalog path (Ian)
|
|
||||||
When following a new primary, have repmgr (not repmgrd) create the new slot (Ian)
|
|
||||||
Add /etc/repmgr.conf as a default configuration file location (Ian)
|
|
||||||
Prevent repmgrd's -v/--verbose option expecting a parameter (Ian)
|
|
||||||
Prevent invalid replication_lag values being written to the monitoring table (Ian)
|
|
||||||
Improve repmgrd behaviour when monitored standby node is temporarily
|
|
||||||
unavailable (Martín)
|
|
||||||
|
|
||||||
3.0.2 2015-10-02
|
|
||||||
Improve handling of --help/--version options; and improve help output (Ian)
|
|
||||||
Improve handling of situation where logfile can't be opened (Ian)
|
|
||||||
Always pass -D/--pgdata option to pg_basebackup (Ian)
|
|
||||||
Bugfix: standby clone --force does not empty pg_xlog (Gianni)
|
|
||||||
Bugfix: autofailover with reconnect_attempts > 1 (Gianni)
|
|
||||||
Bugfix: ignore comments after values (soxwellfb)
|
|
||||||
Bugfix: handle string values in 'node' parameter correctly (Gregory Duchatelet)
|
|
||||||
Allow repmgr to be compiled with a newer libpq (Marco)
|
|
||||||
Bugfix: call update_node_record_set_upstream() for STANDBY FOLLOW (Tomas)
|
|
||||||
Update `repmgr --help` output (per Github report from renard)
|
|
||||||
Update tablespace remapping in --rsync-only mode for 9.5 and later (Ian)
|
|
||||||
Deprecate `-l/--local-port` option - the port can be extracted
|
|
||||||
from the conninfo string in repmgr.conf (Ian)
|
|
||||||
Add STANDBY UNREGISTER (Vik Fearing)
|
|
||||||
Don't fail with error when registering master if schema already defined (Ian)
|
|
||||||
Fixes to whitespace handling when parsing config file (Ian)
|
|
||||||
|
|
||||||
3.0.1 2015-04-16
|
|
||||||
Prevent repmgrd from looping infinitely if node was not registered (Ian)
|
|
||||||
When promoting a standby, have repmgr (not repmgrd) handle metadata updates (Ian)
|
|
||||||
Re-use replication slot if it already exists (Ian)
|
|
||||||
Prevent a test SSH connection being made when not needed (Ian)
|
|
||||||
Correct monitoring table column names (Ian)
|
|
||||||
|
|
||||||
3.0 2015-03-27
|
|
||||||
Require PostgreSQL 9.3 or later (Ian)
|
|
||||||
Use `pg_basebackup` by default (instead of `rsync`) to clone standby servers (Ian)
|
|
||||||
Use `pg_ctl promote` to promote a standby to primary
|
|
||||||
Enable tablespace remapping using `pg_basebackup` (in PostgreSQL 9.3 with `rsync`) (Ian)
|
|
||||||
Support cascaded standbys (Ian)
|
|
||||||
"pg_bindir" no longer required as a configuration parameter (Ian)
|
|
||||||
Enable replication slots to be used (PostgreSQL 9.4 and later (Ian)
|
|
||||||
Command line option "--check-upstream-config" (Ian)
|
|
||||||
Add event logging table and option to execute an external program when an event occurs (Ian)
|
|
||||||
General usability and logging message improvements (Ian)
|
|
||||||
Code consolidation and cleanup (Ian)
|
|
||||||
|
|
||||||
2.0.3 2015-04-16
|
2.0.3 2015-04-16
|
||||||
Add -S/--superuser option for witness database creation Ian)
|
Add -S/--superuser option for witness database creation (Ian)
|
||||||
Add -c/--fast-checkpoint option for cloning (Christoph)
|
Add -c/--fast-checkpoint option for cloning (Christoph)
|
||||||
Add option "--initdb-no-pwprompt" (Ian)
|
Add option "--initdb-no-pwprompt" (Ian)
|
||||||
|
|
||||||
@@ -103,15 +45,15 @@
|
|||||||
Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime)
|
Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime)
|
||||||
Add tunables to specify number of retries to reconnect to master and the time between them (Jaime)
|
Add tunables to specify number of retries to reconnect to master and the time between them (Jaime)
|
||||||
|
|
||||||
1.2.0 2012-07-27
|
1.2.0 2012-07-27
|
||||||
Test ssh connection before trying to rsync (Cédric)
|
Test ssh connection before trying to rsync (Cédric)
|
||||||
Add CLUSTER SHOW command (Carlo)
|
Add CLUSTER SHOW command (Carlo)
|
||||||
Add CLUSTER CLEANUP command (Jaime)
|
Add CLUSTER CLEANUP command (Jaime)
|
||||||
Add function write_primary_conninfo (Marco)
|
Add function write_primary_conninfo (Marco)
|
||||||
Teach repmgr how to get tablespace's location in different pg version (Jaime)
|
Teach repmgr how to get tablespace's location in different pg version (Jaime)
|
||||||
Improve version message (Carlo)
|
Improve version message (Carlo)
|
||||||
|
|
||||||
1.1.1 2012-04-18
|
1.1.1 2012-04-18
|
||||||
Add --ignore-rsync-warning (Cédric)
|
Add --ignore-rsync-warning (Cédric)
|
||||||
Add strnlen for compatibility with OS X (Greg)
|
Add strnlen for compatibility with OS X (Greg)
|
||||||
Improve performance of the repl_status view (Jaime)
|
Improve performance of the repl_status view (Jaime)
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -1,6 +1,6 @@
|
|||||||
#
|
#
|
||||||
# Makefile
|
# Makefile
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
# Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
|
|
||||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||||
|
|||||||
127
PACKAGES.md
127
PACKAGES.md
@@ -1,127 +0,0 @@
|
|||||||
Packaging
|
|
||||||
=========
|
|
||||||
|
|
||||||
Notes on RedHat Linux, Fedora, and CentOS Builds
|
|
||||||
------------------------------------------------
|
|
||||||
|
|
||||||
The RPM packages of PostgreSQL put `pg_config` into the `postgresql-devel`
|
|
||||||
package, not the main server one. And if you have a RPM install of PostgreSQL
|
|
||||||
9.0, the entire PostgreSQL binary directory will not be in your PATH by default
|
|
||||||
either. Individual utilities are made available via the `alternatives`
|
|
||||||
mechanism, but not all commands will be wrapped that way. The files installed
|
|
||||||
by repmgr will certainly not be in the default PATH for the postgres user
|
|
||||||
on such a system. They will instead be in /usr/pgsql-9.0/bin/ on this
|
|
||||||
type of system.
|
|
||||||
|
|
||||||
When building repmgr against a RPM packaged build, you may discover that some
|
|
||||||
development packages are needed as well. The following build errors can
|
|
||||||
occur:
|
|
||||||
|
|
||||||
/usr/bin/ld: cannot find -lxslt
|
|
||||||
/usr/bin/ld: cannot find -lpam
|
|
||||||
|
|
||||||
Install the following packages to correct those:
|
|
||||||
|
|
||||||
|
|
||||||
yum install libxslt-devel
|
|
||||||
yum install pam-devel
|
|
||||||
|
|
||||||
If building repmgr as a regular user, then doing the install into the system
|
|
||||||
directories using sudo, the syntax is hard. `pg_config` won't be in root's
|
|
||||||
path either. The following recipe should work:
|
|
||||||
|
|
||||||
sudo PATH="/usr/pgsql-9.0/bin:$PATH" make USE_PGXS=1 install
|
|
||||||
|
|
||||||
|
|
||||||
Issues with 32 and 64 bit RPMs
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
If when building, you receive a series of errors of this form:
|
|
||||||
|
|
||||||
/usr/bin/ld: skipping incompatible /usr/pgsql-9.0/lib/libpq.so when searching for -lpq
|
|
||||||
|
|
||||||
This is likely because you have both the 32 and 64 bit versions of the
|
|
||||||
`postgresql90-devel` package installed. You can check that like this:
|
|
||||||
|
|
||||||
rpm -qa --queryformat '%{NAME}\t%{ARCH}\n' | grep postgresql90-devel
|
|
||||||
|
|
||||||
And if two packages appear, one for i386 and one for x86_64, that's not supposed
|
|
||||||
to be allowed.
|
|
||||||
|
|
||||||
This can happen when using the PGDG repo to install that package;
|
|
||||||
here is an example sessions demonstrating the problem case appearing:
|
|
||||||
|
|
||||||
|
|
||||||
# yum install postgresql-devel
|
|
||||||
..
|
|
||||||
Setting up Install Process
|
|
||||||
Resolving Dependencies
|
|
||||||
--> Running transaction check
|
|
||||||
---> Package postgresql90-devel.i386 0:9.0.2-2PGDG.rhel5 set to be updated
|
|
||||||
---> Package postgresql90-devel.x86_64 0:9.0.2-2PGDG.rhel5 set to be updated
|
|
||||||
--> Finished Dependency Resolution
|
|
||||||
|
|
||||||
Dependencies Resolved
|
|
||||||
|
|
||||||
=========================================================================
|
|
||||||
Package Arch Version Repository Size
|
|
||||||
=========================================================================
|
|
||||||
Installing:
|
|
||||||
postgresql90-devel i386 9.0.2-2PGDG.rhel5 pgdg90 1.5 M
|
|
||||||
postgresql90-devel x86_64 9.0.2-2PGDG.rhel5 pgdg90 1.6 M
|
|
||||||
|
|
||||||
|
|
||||||
Note how both the i386 and x86_64 platform architectures are selected for
|
|
||||||
installation. Your main PostgreSQL package will only be compatible with one of
|
|
||||||
those, and if the repmgr build finds the wrong postgresql90-devel these
|
|
||||||
"skipping incompatible" messages appear.
|
|
||||||
|
|
||||||
In this case, you can temporarily remove both packages, then just install the
|
|
||||||
correct one for your architecture. Example:
|
|
||||||
|
|
||||||
rpm -e postgresql90-devel --allmatches
|
|
||||||
yum install postgresql90-devel-9.0.2-2PGDG.rhel5.x86_64
|
|
||||||
|
|
||||||
Instead just deleting the package from the wrong platform might not leave behind
|
|
||||||
the correct files, due to the way in which these accidentally happen to interact.
|
|
||||||
If you already tried to build repmgr before doing this, you'll need to do:
|
|
||||||
|
|
||||||
make USE_PGXS=1 clean
|
|
||||||
|
|
||||||
to get rid of leftover files from the wrong architecture.
|
|
||||||
|
|
||||||
Notes on Ubuntu, Debian or other Debian-based Builds
|
|
||||||
----------------------------------------------------
|
|
||||||
|
|
||||||
The Debian packages of PostgreSQL put `pg_config` into the development package
|
|
||||||
called `postgresql-server-dev-$version`.
|
|
||||||
|
|
||||||
When building repmgr against a Debian packages build, you may discover that some
|
|
||||||
development packages are needed as well. You will need the following development
|
|
||||||
packages installed:
|
|
||||||
|
|
||||||
sudo apt-get install libxslt-dev libxml2-dev libpam-dev libedit-dev
|
|
||||||
|
|
||||||
If you're using Debian packages for PostgreSQL and are building repmgr with the
|
|
||||||
USE_PGXS option you also need to install the corresponding development package:
|
|
||||||
|
|
||||||
sudo apt-get install postgresql-server-dev-9.0
|
|
||||||
|
|
||||||
If you build and install repmgr manually it will not be on the system path. The
|
|
||||||
binaries will be installed in /usr/lib/postgresql/$version/bin/ which is not on
|
|
||||||
the default path. The reason behind this is that Ubuntu/Debian systems manage
|
|
||||||
multiple installed versions of PostgreSQL on the same system through a wrapper
|
|
||||||
called pg_wrapper and repmgr is not (yet) known to this wrapper.
|
|
||||||
|
|
||||||
You can solve this in many different ways, the most Debian like is to make an
|
|
||||||
alternate for repmgr and repmgrd:
|
|
||||||
|
|
||||||
sudo update-alternatives --install /usr/bin/repmgr repmgr /usr/lib/postgresql/9.0/bin/repmgr 10
|
|
||||||
sudo update-alternatives --install /usr/bin/repmgrd repmgrd /usr/lib/postgresql/9.0/bin/repmgrd 10
|
|
||||||
|
|
||||||
You can also make a deb package of repmgr using:
|
|
||||||
|
|
||||||
make USE_PGXS=1 deb
|
|
||||||
|
|
||||||
This will build a Debian package one level up from where you build, normally the
|
|
||||||
same directory that you have your repmgr/ directory in.
|
|
||||||
294
QUICKSTART.md
294
QUICKSTART.md
@@ -1,12 +1,135 @@
|
|||||||
repmgr quickstart guide
|
repmgr: Quickstart guide
|
||||||
=======================
|
========================
|
||||||
|
|
||||||
This quickstart guide provides some annotated examples on basic
|
`repmgr` is an open-source tool suite for mananaging replication and failover
|
||||||
`repmgr` setup. It assumes you are familiar with PostgreSQL replication
|
among multiple PostgreSQL server nodes. It enhances PostgreSQL's built-in
|
||||||
concepts setup and Linux/UNIX system administration.
|
hot-standby capabilities with a set of administration tools for monitoring
|
||||||
|
replication, setting up standby servers and performing failover/switchover
|
||||||
|
operations.
|
||||||
|
|
||||||
|
This quickstart guide assumes you are familiar with PostgreSQL replication
|
||||||
|
setup and Linux/UNIX system administration. For a more detailed tutorial
|
||||||
|
covering setup on a variety of different systems, see the README.rst file.
|
||||||
|
|
||||||
|
Conceptual Overview
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
`repmgr` provides two binaries:
|
||||||
|
|
||||||
|
- `repmgr`: a command-line client to manage replication and `repmgr` configuration
|
||||||
|
- `repmgrd`: an optional daemon process which runs on standby nodes to monitor
|
||||||
|
replication and node status
|
||||||
|
|
||||||
|
Each PostgreSQL node requires a `repmgr.conf` configuration file; additionally
|
||||||
|
it must be "registered" using the `repmgr` command-line client. `repmgr` stores
|
||||||
|
information about managed nodes in a custom schema on the node's current master
|
||||||
|
database.
|
||||||
|
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
------------
|
||||||
|
|
||||||
|
`repmgr` works with PostgreSQL 9.0 and later. All server nodes must be running the
|
||||||
|
same PostgreSQL major version, and preferably should be running the same minor
|
||||||
|
version.
|
||||||
|
|
||||||
|
`repmgr` will work on any Linux or UNIX-like environment capable of running
|
||||||
|
PostgreSQL. `rsync` must also be installed.
|
||||||
|
|
||||||
|
|
||||||
|
Installation
|
||||||
|
------------
|
||||||
|
|
||||||
|
`repmgr` must be installed on each PostgreSQL server node.
|
||||||
|
|
||||||
|
* Packages
|
||||||
|
- RPM packages for RedHat-based distributions are available from PGDG
|
||||||
|
- Debian/Ubuntu provide .deb packages.
|
||||||
|
|
||||||
|
It is also possible to build .deb packages directly from the `repmgr` source;
|
||||||
|
see README.rst for further details.
|
||||||
|
|
||||||
|
* Source installation
|
||||||
|
- `repmgr` source code is hosted at github (https://github.com/2ndQuadrant/repmgr);
|
||||||
|
tar.gz files can be downloaded from https://github.com/2ndQuadrant/repmgr/releases .
|
||||||
|
|
||||||
|
`repmgr` can be built easily using PGXS:
|
||||||
|
|
||||||
|
sudo make USE_PGXS=1 install
|
||||||
|
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
-------------
|
||||||
|
|
||||||
|
### Server configuration
|
||||||
|
|
||||||
|
Password-less SSH logins must be enabled for the database system user (typically `postgres`)
|
||||||
|
between all server nodes to enable `repmgr` to copy required files.
|
||||||
|
|
||||||
|
### PostgreSQL configuration
|
||||||
|
|
||||||
|
The master PostgreSQL node needs to be configured for replication with the
|
||||||
|
following settings:
|
||||||
|
|
||||||
|
wal_level = 'hot_standby' # minimal, archive, hot_standby, or logical
|
||||||
|
archive_mode = on # allows archiving to be done
|
||||||
|
archive_command = 'cd .' # command to use to archive a logfile segment
|
||||||
|
max_wal_senders = 10 # max number of walsender processes
|
||||||
|
wal_keep_segments = 5000 # in logfile segments, 16MB each; 0 disables
|
||||||
|
hot_standby = on # "on" allows queries during recovery
|
||||||
|
|
||||||
|
Note that `repmgr` expects a default of 5000 wal_keep_segments, although this
|
||||||
|
value can be overridden when executing the `repmgr` client.
|
||||||
|
|
||||||
|
Additionally, `repmgr` requires a dedicated PostgreSQL superuser account
|
||||||
|
and a database in which to store monitoring and replication data. The `repmgr`
|
||||||
|
user account will also be used for replication connections from the standby,
|
||||||
|
so a seperate replication user with the `REPLICATION` privilege is not required.
|
||||||
|
The database can in principle be any database, including the default `postgres`
|
||||||
|
one, however it's probably advisable to create a dedicated database for `repmgr`
|
||||||
|
usage.
|
||||||
|
|
||||||
|
|
||||||
|
### repmgr configuration
|
||||||
|
|
||||||
|
Each PostgreSQL node requires a `repmgr.conf` configuration file containing
|
||||||
|
identification and database connection information:
|
||||||
|
|
||||||
|
cluster=test
|
||||||
|
node=1
|
||||||
|
node_name=node1
|
||||||
|
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||||
|
pg_bindir=/path/to/postgres/bin
|
||||||
|
|
||||||
|
* `cluster`: common name for the replication cluster; this must be the same on all nodes
|
||||||
|
* `node`: a unique, abitrary integer identifier
|
||||||
|
* `name`: a unique, human-readable name
|
||||||
|
* `conninfo`: a standard conninfo string enabling repmgr to connect to the
|
||||||
|
control database; user and name must be the same on all nodes, while other
|
||||||
|
parameters such as port may differ. The `host` parameter *must* be a hostname
|
||||||
|
resolvable by all nodes on the cluster.
|
||||||
|
* `pg_bindir`: (optional) location of PostgreSQL binaries, if not in the default $PATH
|
||||||
|
|
||||||
|
Note that the configuration file should *not* be stored inside the PostgreSQL
|
||||||
|
data directory. The configuration file can be specified with the
|
||||||
|
`-f, --config-file=PATH` option and can have any arbitrary name. If no
|
||||||
|
configuration file is specified, `repmgr` will search for `repmgr.conf`
|
||||||
|
in the current working directory.
|
||||||
|
|
||||||
|
Each node configuration needs to be registered with `repmgr`, either using the
|
||||||
|
`repmgr` command line tool, or the `repmgrd` daemon; for details see below. Details
|
||||||
|
about each node are inserted into the `repmgr` database (for details see below).
|
||||||
|
|
||||||
|
|
||||||
|
Replication setup and monitoring
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
For the purposes of this guide, we'll assume the database user will be
|
For the purposes of this guide, we'll assume the database user will be
|
||||||
`repmgr_usr` and the database will be `repmgr_db`.
|
`repmgr_usr` and the database will be `repmgr_db`, and that the following
|
||||||
|
environment variables are set on each node:
|
||||||
|
|
||||||
|
- $HOME: the PostgreSQL system user's home directory
|
||||||
|
- $PGDATA: the PostgreSQL data directory
|
||||||
|
|
||||||
|
|
||||||
Master setup
|
Master setup
|
||||||
@@ -21,8 +144,7 @@ Master setup
|
|||||||
CREATE DATABASE repmgr_db OWNER repmgr_usr;
|
CREATE DATABASE repmgr_db OWNER repmgr_usr;
|
||||||
```
|
```
|
||||||
|
|
||||||
- configure `postgresql.conf` for replication (see README.md for sample
|
- configure `postgresql.conf` for replication (see above)
|
||||||
settings)
|
|
||||||
|
|
||||||
- update `pg_hba.conf`, e.g.:
|
- update `pg_hba.conf`, e.g.:
|
||||||
|
|
||||||
@@ -32,10 +154,9 @@ Master setup
|
|||||||
```
|
```
|
||||||
|
|
||||||
Restart the PostgreSQL server after making these changes.
|
Restart the PostgreSQL server after making these changes.
|
||||||
|
|
||||||
2. Create the `repmgr` configuration file:
|
2. Create the `repmgr` configuration file:
|
||||||
|
|
||||||
$ cat /path/to/repmgr/node1/repmgr.conf
|
$ cat $HOME/repmgr/repmgr.conf
|
||||||
cluster=test
|
cluster=test
|
||||||
node=1
|
node=1
|
||||||
node_name=node1
|
node_name=node1
|
||||||
@@ -47,35 +168,36 @@ Master setup
|
|||||||
|
|
||||||
3. Register the master node with `repmgr`:
|
3. Register the master node with `repmgr`:
|
||||||
|
|
||||||
$ repmgr -f /path/to/repmgr/node1/repmgr.conf --verbose master register
|
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose master register
|
||||||
[2015-03-03 17:45:53] [INFO] repmgr connecting to master database
|
[2014-07-04 10:43:42] [INFO] repmgr mgr connecting to master database
|
||||||
[2015-03-03 17:45:53] [INFO] repmgr connected to master, checking its state
|
[2014-07-04 10:43:42] [INFO] repmgr connected to master, checking its state
|
||||||
[2015-03-03 17:45:53] [INFO] master register: creating database objects inside the repmgr_test schema
|
[2014-07-04 10:43:42] [INFO] master register: creating database objects inside the repmgr_test schema
|
||||||
[2015-03-03 17:45:53] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
[2014-07-04 10:43:43] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||||
|
|
||||||
Standby setup
|
|
||||||
-------------
|
|
||||||
|
|
||||||
1. Use `repmgr standby clone` to clone a standby from the master:
|
Slave/standby setup
|
||||||
|
-------------------
|
||||||
|
|
||||||
repmgr -D /path/to/standby/data -d repmgr_db -U repmgr_usr --verbose standby clone 192.168.1.2
|
1. Use `repmgr` to clone the master:
|
||||||
[2015-03-03 18:18:21] [NOTICE] No configuration file provided and default file './repmgr.conf' not found - continuing with default values
|
|
||||||
[2015-03-03 18:18:21] [NOTICE] repmgr Destination directory ' /path/to/standby/data' provided
|
$ repmgr -D $PGDATA -d repmgr_db -U repmgr_usr -R postgres --verbose standby clone 192.168.1.2
|
||||||
[2015-03-03 18:18:21] [INFO] repmgr connecting to upstream node
|
Opening configuration file: ./repmgr.conf
|
||||||
[2015-03-03 18:18:21] [INFO] repmgr connected to upstream node, checking its state
|
[2014-07-04 10:49:00] [ERROR] Did not find the configuration file './repmgr.conf', continuing
|
||||||
[2015-03-03 18:18:21] [INFO] Successfully connected to upstream node. Current installation size is 27 MB
|
[2014-07-04 10:49:00] [INFO] repmgr connecting to master database
|
||||||
[2015-03-03 18:18:21] [NOTICE] Starting backup...
|
[2014-07-04 10:49:00] [INFO] repmgr connected to master, checking its state
|
||||||
[2015-03-03 18:18:21] [INFO] creating directory " /path/to/standby/data"...
|
[2014-07-04 10:49:00] [INFO] Successfully connected to primary. Current installation size is 1807 MB
|
||||||
[2015-03-03 18:18:21] [INFO] Executing: 'pg_basebackup -l "repmgr base backup" -h localhost -p 9595 -U repmgr_usr -D /path/to/standby/data '
|
[2014-07-04 10:49:00] [NOTICE] Starting backup...
|
||||||
|
[2014-07-04 10:49:00] [INFO] creating directory "/path/to/data/"...
|
||||||
|
(...)
|
||||||
|
[2014-07-04 10:53:19] [NOTICE] Finishing backup...
|
||||||
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
|
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
|
||||||
[2015-03-03 18:18:23] [NOTICE] repmgr standby clone (using pg_basebackup) complete
|
[2014-07-04 10:53:21] [INFO] repmgr requires primary to keep WAL files 0000000100000000000000AD until at least 0000000100000000000000AD
|
||||||
[2015-03-03 18:18:23] [NOTICE] HINT: You can now start your postgresql server
|
[2014-07-04 10:53:21] [NOTICE] repmgr standby clone complete
|
||||||
[2015-03-03 18:18:23] [NOTICE] for example : pg_ctl -D /path/to/standby/data start
|
[2014-07-04 10:53:21] [NOTICE] HINT: You can now start your postgresql server
|
||||||
|
[2014-07-04 10:53:21] [NOTICE] for example : /etc/init.d/postgresql start
|
||||||
|
|
||||||
Note that the `repmgr.conf` file is not required when cloning a standby.
|
-R is the database system user on the master node. At this point it does not matter
|
||||||
However we recommend providing a valid `repmgr.conf` if you wish to use
|
if the `repmgr.conf` file is not found.
|
||||||
replication slots, or want `repmgr` to log the clone event to the
|
|
||||||
`repl_events` table.
|
|
||||||
|
|
||||||
This will clone the PostgreSQL database files from the master, including its
|
This will clone the PostgreSQL database files from the master, including its
|
||||||
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
|
||||||
@@ -86,33 +208,97 @@ Standby setup
|
|||||||
|
|
||||||
3. Create the `repmgr` configuration file:
|
3. Create the `repmgr` configuration file:
|
||||||
|
|
||||||
$ cat /path/node2/repmgr/repmgr.conf
|
$ cat $HOME/repmgr/repmgr.conf
|
||||||
cluster=test
|
cluster=test
|
||||||
node=2
|
node=2
|
||||||
node_name=node2
|
node_name=node2
|
||||||
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
|
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
|
||||||
pg_bindir=/path/to/postgres/bin
|
pg_bindir=/path/to/postgres/bin
|
||||||
|
|
||||||
4. Register the standby node with `repmgr`:
|
4. Register the master node with `repmgr`:
|
||||||
|
|
||||||
$ repmgr -f /path/to/repmgr/node2/repmgr.conf --verbose standby register
|
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose standby register
|
||||||
[2015-03-03 18:24:34] [NOTICE] Opening configuration file: /path/to/repmgr/node2/repmgr.conf
|
Opening configuration file: /path/to/repmgr/repmgr.conf
|
||||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to standby database
|
[2014-07-04 11:48:13] [INFO] repmgr connecting to standby database
|
||||||
[2015-03-03 18:24:34] [INFO] repmgr connecting to master database
|
[2014-07-04 11:48:13] [INFO] repmgr connected to standby, checking its state
|
||||||
[2015-03-03 18:24:34] [INFO] finding node list for cluster 'test'
|
[2014-07-04 11:48:13] [INFO] repmgr connecting to master database
|
||||||
[2015-03-03 18:24:34] [INFO] checking role of cluster node '1'
|
[2014-07-04 11:48:13] [INFO] finding node list for cluster 'test'
|
||||||
[2015-03-03 18:24:34] [INFO] repmgr connected to master, checking its state
|
[2014-07-04 11:48:13] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby
|
[2014-07-04 11:48:13] [INFO] repmgr connected to master, checking its state
|
||||||
[2015-03-03 18:24:34] [INFO] repmgr registering the standby complete
|
[2014-07-04 11:48:13] [INFO] repmgr registering the standby
|
||||||
[2015-03-03 18:24:34] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
[2014-07-04 11:48:13] [INFO] repmgr registering the standby complete
|
||||||
|
[2014-07-04 11:48:13] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
|
||||||
|
|
||||||
|
Monitoring
|
||||||
|
----------
|
||||||
|
|
||||||
|
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
||||||
|
and which and can automate remote actions. It can be started simply with e.g.:
|
||||||
|
|
||||||
|
repmgrd -f $HOME/repmgr/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
||||||
|
|
||||||
|
or alternatively:
|
||||||
|
|
||||||
|
repmgrd -f $HOME/repmgr/repmgr.conf --verbose --monitoring-history > $HOME/repmgr/repmgrd.log 2>&1
|
||||||
|
|
||||||
|
which will track advance or lag of the replication in every standby in the
|
||||||
|
`repl_monitor` table.
|
||||||
|
|
||||||
|
Example log output:
|
||||||
|
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to database 'host=localhost user=repmgr_usr dbname=repmgr_db'
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Connected to database, checking its state
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to primary for cluster 'test'
|
||||||
|
[2014-07-04 11:55:17] [INFO] finding node list for cluster 'test'
|
||||||
|
[2014-07-04 11:55:17] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Checking cluster configuration with schema 'repmgr_test'
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Checking node 2 in cluster 'test'
|
||||||
|
[2014-07-04 11:55:17] [INFO] Reloading configuration file and updating repmgr tables
|
||||||
|
[2014-07-04 11:55:17] [INFO] repmgrd Starting continuous standby node monitoring
|
||||||
|
|
||||||
|
|
||||||
This concludes the basic `repmgr` setup of master and standby. The records
|
Failover
|
||||||
created in the `repl_nodes` table should look something like this:
|
--------
|
||||||
|
|
||||||
|
To promote a standby to master, on the standby execute e.g.:
|
||||||
|
|
||||||
|
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby promote
|
||||||
|
|
||||||
|
`repmgr` will attempt to connect to the current master to verify that it
|
||||||
|
is not available (if it is, `repmgr` will not promote the standby).
|
||||||
|
|
||||||
|
Other standby servers need to be told to follow the new master with:
|
||||||
|
|
||||||
|
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby follow
|
||||||
|
|
||||||
|
See file `autofailover_quick_setup.rst` for details on setting up
|
||||||
|
automated failover.
|
||||||
|
|
||||||
|
|
||||||
|
repmgr database schema
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
`repmgr` creates a small schema for its own use in the database specified in
|
||||||
|
each node's conninfo configuration parameter. This database can in principle
|
||||||
|
be any database. The schema name is the global `cluster` name prefixed
|
||||||
|
with `repmgr_`, so for the example setup above the schema name is
|
||||||
|
`repmgr_test`.
|
||||||
|
|
||||||
|
The schema contains two tables:
|
||||||
|
|
||||||
|
* `repl_nodes`
|
||||||
|
stores information about all registered servers in the cluster
|
||||||
|
* `repl_monitor`
|
||||||
|
stores monitoring information about each node
|
||||||
|
|
||||||
|
and one view, `repl_status`, which summarizes the latest monitoring information
|
||||||
|
for each node.
|
||||||
|
|
||||||
|
|
||||||
|
Further reading
|
||||||
|
---------------
|
||||||
|
|
||||||
|
* http://blog.2ndquadrant.com/announcing-repmgr-2-0/
|
||||||
|
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||||
|
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
||||||
|
|
||||||
repmgr_db=# SELECT * from repmgr_test.repl_nodes;
|
|
||||||
id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active
|
|
||||||
----+---------+------------------+---------+-------+----------------------------------------------------+-----------+----------+--------
|
|
||||||
1 | primary | | test | node1 | host=repmgr_node1 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
|
||||||
2 | standby | 1 | test | node2 | host=repmgr_node2 user=repmgr_usr dbname=repmgr_db | | 0 | t
|
|
||||||
(2 rows)
|
|
||||||
|
|||||||
666
README.md
666
README.md
@@ -1,666 +0,0 @@
|
|||||||
repmgr: Replication Manager for PostgreSQL
|
|
||||||
==========================================
|
|
||||||
|
|
||||||
`repmgr` is an open-source tool to manage replication and failover
|
|
||||||
between multiple PostgreSQL servers. It enhances PostgreSQL's built-in
|
|
||||||
hot-standby capabilities with tools to set up standby servers, monitor
|
|
||||||
replication, and perform administrative tasks such as failover or manual
|
|
||||||
switchover operations.
|
|
||||||
|
|
||||||
This document covers `repmgr 3`, which supports PostgreSQL 9.3 and later.
|
|
||||||
This version can use `pg_basebackup` to clone standby servers, supports
|
|
||||||
replication slots and cascading replication, doesn't require a restart
|
|
||||||
after promotion, and has many usability improvements.
|
|
||||||
|
|
||||||
Please continue to use `repmgr 2` with PostgreSQL 9.2 and earlier.
|
|
||||||
For a list of changes since `repmgr 2` and instructions on upgrading to
|
|
||||||
`repmgr 3`, see the "Upgrading from repmgr 2" section below.
|
|
||||||
|
|
||||||
For a list of frequently asked questions about `repmgr`, please refer
|
|
||||||
to the file `FAQ.md`.
|
|
||||||
|
|
||||||
Overview
|
|
||||||
--------
|
|
||||||
|
|
||||||
The `repmgr` command-line tool is used to perform administrative tasks,
|
|
||||||
and the `repmgrd` daemon is used to optionally monitor replication and
|
|
||||||
manage automatic failover.
|
|
||||||
|
|
||||||
To get started, each PostgreSQL node in your cluster must have a
|
|
||||||
`repmgr.conf` file. The current master node must be registered using
|
|
||||||
`repmgr master register`. Existing standby servers can be registered
|
|
||||||
using `repmgr standby register`. A new standby server can be created
|
|
||||||
using `repmgr standby clone` followed by `repmgr standby register`.
|
|
||||||
|
|
||||||
See the `QUICKSTART.md` file for examples of how to use these commands.
|
|
||||||
|
|
||||||
Once the cluster is in operation, run `repmgr cluster show` to see the
|
|
||||||
status of the registered primary and standby nodes. Any standby can be
|
|
||||||
manually promoted using `repmgr standby promote`. Other standby nodes
|
|
||||||
can be told to follow the new master using `repmgr standby follow`. We
|
|
||||||
show examples of these commands below.
|
|
||||||
|
|
||||||
Next, for detailed monitoring, you must run `repmgrd` (with the same
|
|
||||||
configuration file) on all your nodes. Replication status information is
|
|
||||||
stored in a custom schema along with information about registered nodes.
|
|
||||||
You also need `repmgrd` to configure automatic failover in your cluster.
|
|
||||||
|
|
||||||
See the `FAILOVER.rst` file for an explanation of how to set up
|
|
||||||
automatic failover.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
`repmgr` is developed and tested on Linux and OS X, but it should work
|
|
||||||
on any UNIX-like system which PostgreSQL itself supports.
|
|
||||||
|
|
||||||
All nodes must be running the same major version of PostgreSQL, and we
|
|
||||||
recommend that they also run the same minor version. This version of
|
|
||||||
`repmgr` (v3) supports PostgreSQL 9.3 and later.
|
|
||||||
|
|
||||||
Earlier versions of `repmgr` needed password-less SSH access between
|
|
||||||
nodes in order to clone standby servers using `rsync`. `repmgr 3` can
|
|
||||||
use `pg_basebackup` instead in most circumstances; ssh is not required.
|
|
||||||
|
|
||||||
You will need to use rsync only if your PostgreSQL configuration files
|
|
||||||
are outside your data directory (as on Debian) and you wish these to
|
|
||||||
be copied by `repmgr`. See the `SSH-RSYNC.md` file for details on
|
|
||||||
configuring password-less SSH between your nodes.
|
|
||||||
|
|
||||||
Installation
|
|
||||||
------------
|
|
||||||
|
|
||||||
`repmgr` must be installed on each PostgreSQL server node.
|
|
||||||
|
|
||||||
* Packages
|
|
||||||
- PGDG publishes RPM packages for RedHat-based distributions
|
|
||||||
- Debian/Ubuntu provide .deb packages.
|
|
||||||
- See `PACKAGES.md` for details on building .deb and .rpm packages
|
|
||||||
from the `repmgr` source code.
|
|
||||||
|
|
||||||
* Source installation
|
|
||||||
- `git clone https://github.com/2ndQuadrant/repmgr`
|
|
||||||
- Or download tar.gz files from
|
|
||||||
https://github.com/2ndQuadrant/repmgr/releases
|
|
||||||
- To install from source, run `sudo make USE_PGXS=1 install`
|
|
||||||
|
|
||||||
After installation, you should be able to run `repmgr --version` and
|
|
||||||
`repmgrd --version`. These binaries should be installed in the same
|
|
||||||
directory as other PostgreSQL binaries, such as `psql`.
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
-------------
|
|
||||||
|
|
||||||
### Server configuration
|
|
||||||
|
|
||||||
By default, `repmgr` uses PostgreSQL's built-in replication protocol to
|
|
||||||
clone a primary and create a standby server. If your configuration files
|
|
||||||
live outside your data directory, however, you will still need to set up
|
|
||||||
password-less SSH so that rsync can be used. See the `SSH-RSYNC.md` file
|
|
||||||
for details.
|
|
||||||
|
|
||||||
### PostgreSQL configuration
|
|
||||||
|
|
||||||
The primary server needs to be configured for replication with settings
|
|
||||||
like the following in `postgresql.conf`:
|
|
||||||
|
|
||||||
# Allow read-only queries on standby servers. The number of WAL
|
|
||||||
# senders should be larger than the number of standby servers.
|
|
||||||
|
|
||||||
hot_standby = on
|
|
||||||
wal_level = 'hot_standby'
|
|
||||||
max_wal_senders = 10
|
|
||||||
|
|
||||||
# How much WAL to retain on the primary to allow a temporarily
|
|
||||||
# disconnected standby to catch up again. The larger this is, the
|
|
||||||
# longer the standby can be disconnected. This is needed only in
|
|
||||||
# 9.3; from 9.4, replication slots can be used instead (see below).
|
|
||||||
|
|
||||||
wal_keep_segments = 5000
|
|
||||||
|
|
||||||
# Enable archiving, but leave it unconfigured (so that it can be
|
|
||||||
# configured without a restart later). Recommended, not required.
|
|
||||||
|
|
||||||
archive_mode = on
|
|
||||||
archive_command = 'cd .'
|
|
||||||
|
|
||||||
# If you plan to use repmgrd, ensure that shared_preload_libraries
|
|
||||||
# is configured to load 'repmgr_funcs'
|
|
||||||
|
|
||||||
shared_preload_libraries = 'repmgr_funcs'
|
|
||||||
|
|
||||||
PostgreSQL 9.4 makes it possible to use replication slots, which means
|
|
||||||
the value of `wal_keep_segments` need no longer be set. See section
|
|
||||||
"Replication slots" below for more details.
|
|
||||||
|
|
||||||
With PostgreSQL 9.3, `repmgr` expects `wal_keep_segments` to be set to
|
|
||||||
at least 5000 (= 80GB of WAL) by default, though this can be overriden
|
|
||||||
with the `-w N` argument.
|
|
||||||
|
|
||||||
A dedicated PostgreSQL superuser account and a database in which to
|
|
||||||
store monitoring and replication data are required. Create them by
|
|
||||||
running the following commands:
|
|
||||||
|
|
||||||
createuser -s repmgr
|
|
||||||
createdb repmgr -O repmgr
|
|
||||||
|
|
||||||
We recommend using the name `repmgr` for both user and database, but you
|
|
||||||
can use whatever name you like (and you need to set the names you chose
|
|
||||||
in the `conninfo` string in `repmgr.conf`; see below). We also recommend
|
|
||||||
that you set the `repmgr` user's search path to include the `repmgr` schema
|
|
||||||
for convenience when querying the metadata tables and views.
|
|
||||||
|
|
||||||
The `repmgr` application will create its metadata schema in the `repmgr`
|
|
||||||
database when the master server is registered.
|
|
||||||
|
|
||||||
### repmgr configuration
|
|
||||||
|
|
||||||
Create a `repmgr.conf` file on each server. Here's a minimal sample:
|
|
||||||
|
|
||||||
cluster=test
|
|
||||||
node=1
|
|
||||||
node_name=node1
|
|
||||||
conninfo='host=repmgr_node1 user=repmgr dbname=repmgr'
|
|
||||||
|
|
||||||
The `cluster` name must be the same on all nodes. The `node` (an
|
|
||||||
integer) and `node_name` must be unique to each node.
|
|
||||||
|
|
||||||
The `conninfo` string must point to repmgr's database *on this node*.
|
|
||||||
The host must be an IP or a name that all the nodes in the cluster can
|
|
||||||
resolve (not `localhost`!). All nodes must use the same username and
|
|
||||||
database name, but other parameters, such as the port, can vary between
|
|
||||||
nodes.
|
|
||||||
|
|
||||||
Your `repmgr.conf` should not be stored inside the PostgreSQL data
|
|
||||||
directory. We recommend `/etc/repmgr/repmgr.conf`, but you can place it
|
|
||||||
anywhere and use the `-f /path/to/repmgr.conf` option to tell `repmgr`
|
|
||||||
where it is. If not specified, `repmgr` will search for `repmgr.conf` in
|
|
||||||
the current working directory.
|
|
||||||
|
|
||||||
If your PostgreSQL binaries (`pg_ctl`, `pg_basebackup`) are not in your
|
|
||||||
`PATH`, you can specify an alternate location in `repmgr.conf`:
|
|
||||||
|
|
||||||
pg_bindir=/path/to/postgres/bin
|
|
||||||
|
|
||||||
See `repmgr.conf.sample` for an example configuration file with all
|
|
||||||
available configuration settings annotated.
|
|
||||||
|
|
||||||
### Starting up
|
|
||||||
|
|
||||||
The master node must be registered first using `repmgr master register`,
|
|
||||||
and standby servers must be registered using `repmgr standby register`;
|
|
||||||
this inserts details about each node into the control database. Use
|
|
||||||
`repmgr cluster show` to see the result.
|
|
||||||
|
|
||||||
See the `QUICKSTART.md` file for examples of how to use these commands.
|
|
||||||
|
|
||||||
Failover
|
|
||||||
--------
|
|
||||||
|
|
||||||
To promote a standby to master, on the standby execute e.g.:
|
|
||||||
|
|
||||||
repmgr -f /etc/repmgr/repmgr.conf --verbose standby promote
|
|
||||||
|
|
||||||
`repmgr` will attempt to connect to the current master to verify that it
|
|
||||||
is not available (if it is, `repmgr` will not promote the standby).
|
|
||||||
|
|
||||||
Other standby servers need to be told to follow the new master with e.g.:
|
|
||||||
|
|
||||||
repmgr -f /etc/repmgr/repmgr.conf --verbose standby follow
|
|
||||||
|
|
||||||
See file `FAILOVER.rst` for details on setting up automated failover.
|
|
||||||
|
|
||||||
|
|
||||||
Converting a failed master to a standby
|
|
||||||
---------------------------------------
|
|
||||||
|
|
||||||
Often it's desirable to bring a failed master back into replication
|
|
||||||
as a standby. First, ensure that the master's PostgreSQL server is
|
|
||||||
no longer running; then use `repmgr standby clone` to re-sync its
|
|
||||||
data directory with the current master, e.g.:
|
|
||||||
|
|
||||||
repmgr -f /etc/repmgr/repmgr.conf \
|
|
||||||
--force --rsync-only \
|
|
||||||
-h node2 -d repmgr -U repmgr --verbose \
|
|
||||||
standby clone
|
|
||||||
|
|
||||||
Here it's essential to use the command line options `--force`, to
|
|
||||||
ensure `repmgr` will re-use the existing data directory, and
|
|
||||||
`--rsync-only`, which causes `repmgr` to use `rsync` rather than
|
|
||||||
`pg_basebackup`, as the latter can only be used to clone a fresh
|
|
||||||
standby.
|
|
||||||
|
|
||||||
The node can then be restarted.
|
|
||||||
|
|
||||||
The node will then need to be re-registered with `repmgr`; again
|
|
||||||
the `--force` option is required to update the existing record:
|
|
||||||
|
|
||||||
repmgr -f /etc/repmgr/repmgr.conf \
|
|
||||||
--force \
|
|
||||||
standby register
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Replication management with repmgrd
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
`repmgrd` is a management and monitoring daemon which runs on standby nodes
|
|
||||||
and which can automate actions such as failover and updating standbys to
|
|
||||||
follow the new master.`repmgrd` can be started simply with e.g.:
|
|
||||||
|
|
||||||
repmgrd -f /etc/repmgr/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
|
|
||||||
|
|
||||||
or alternatively:
|
|
||||||
|
|
||||||
repmgrd -f /etc/repmgr/repmgr.conf --verbose --monitoring-history > $HOME/repmgr/repmgrd.log 2>&1
|
|
||||||
|
|
||||||
which will track replication advance or lag on all registered standbys.
|
|
||||||
|
|
||||||
For permanent operation, we recommend using the options `-d/--daemonize` to
|
|
||||||
detach the `repmgrd` process, and `-p/--pid-file` to write the process PID
|
|
||||||
to a file.
|
|
||||||
|
|
||||||
Example log output (at default log level):
|
|
||||||
|
|
||||||
[2015-03-11 13:15:40] [INFO] checking cluster configuration with schema 'repmgr_test'
|
|
||||||
[2015-03-11 13:15:40] [INFO] checking node 2 in cluster 'test'
|
|
||||||
[2015-03-11 13:15:40] [INFO] reloading configuration file and updating repmgr tables
|
|
||||||
[2015-03-11 13:15:40] [INFO] starting continuous standby node monitoring
|
|
||||||
|
|
||||||
Note that currently `repmgrd` does not provide logfile rotation. To ensure
|
|
||||||
the current logfile does not grow indefinitely, configure your system's `logrotate`
|
|
||||||
to do this. Sample configuration to rotate logfiles weekly with retention
|
|
||||||
for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
|
|
||||||
|
|
||||||
/var/log/postgresql/repmgr-9.4.log {
|
|
||||||
missingok
|
|
||||||
compress
|
|
||||||
rotate 52
|
|
||||||
maxsize 100M
|
|
||||||
weekly
|
|
||||||
create 0600 postgres postgres
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Witness server
|
|
||||||
--------------
|
|
||||||
|
|
||||||
In a situation caused e.g. by a network interruption between two
|
|
||||||
data centres, it's important to avoid a "split-brain" situation where
|
|
||||||
both sides of the network assume they are the active segment and the
|
|
||||||
side without an active master unilaterally promotes one of its standbys.
|
|
||||||
|
|
||||||
To prevent this situation happening, it's essential to ensure that one
|
|
||||||
network segment has a "voting majority", so other segments will know
|
|
||||||
they're in the minority and not attempt to promote a new master. Where
|
|
||||||
an odd number of servers exists, this is not an issue. However, if each
|
|
||||||
network has an even number of nodes, it's necessary to provide some way
|
|
||||||
of ensuring a majority, which is where the witness server becomes useful.
|
|
||||||
|
|
||||||
This is not a fully-fledged standby node and is not integrated into
|
|
||||||
replication, but it effectively represents the "casting vote" when
|
|
||||||
deciding which network segment has a majority. A witness server can
|
|
||||||
be set up using `repmgr witness create` (see below for details) and
|
|
||||||
can run on a dedicated server or an existing node. Note that it only
|
|
||||||
makes sense to create a witness server in conjunction with running
|
|
||||||
`repmgrd`; the witness server will require its own `repmgrd` instance.
|
|
||||||
|
|
||||||
|
|
||||||
Monitoring
|
|
||||||
----------
|
|
||||||
|
|
||||||
When `repmgrd` is running with the option `-m/--monitoring-history`, it will
|
|
||||||
constantly write node status information to the `repl_monitor` table, which can
|
|
||||||
be queried easily using the view `repl_status`:
|
|
||||||
|
|
||||||
repmgr=# SELECT * FROM repmgr_test.repl_status;
|
|
||||||
-[ RECORD 1 ]-------------+-----------------------------
|
|
||||||
primary_node | 1
|
|
||||||
standby_node | 2
|
|
||||||
standby_name | node2
|
|
||||||
node_type | standby
|
|
||||||
active | t
|
|
||||||
last_monitor_time | 2015-03-11 14:02:34.51713+09
|
|
||||||
last_wal_primary_location | 0/3012AF0
|
|
||||||
last_wal_standby_location | 0/3012AF0
|
|
||||||
replication_lag | 0 bytes
|
|
||||||
replication_time_lag | 00:00:03.463085
|
|
||||||
apply_lag | 0 bytes
|
|
||||||
communication_time_lag | 00:00:00.955385
|
|
||||||
|
|
||||||
|
|
||||||
Event logging and notifications
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
To help understand what significant events (e.g. failure of a node) happened
|
|
||||||
when and for what reason, `repmgr` logs such events into the `repl_events`
|
|
||||||
table, e.g.:
|
|
||||||
|
|
||||||
repmgr_db=# SELECT * from repmgr_test.repl_events ;
|
|
||||||
node_id | event | successful | event_timestamp | details
|
|
||||||
---------+------------------+------------+-------------------------------+-----------------------------------------------------------------------------------
|
|
||||||
1 | master_register | t | 2015-03-16 17:36:21.711796+09 |
|
|
||||||
2 | standby_clone | t | 2015-03-16 17:36:31.286934+09 | Cloned from host 'localhost', port 5500; backup method: pg_basebackup; --force: N
|
|
||||||
2 | standby_register | t | 2015-03-16 17:36:32.391567+09 |
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
|
|
||||||
Additionally `repmgr` can execute an external program each time an event is
|
|
||||||
logged. This program is defined with the configuration variable
|
|
||||||
`event_notification_command`; the command string can contain the following
|
|
||||||
placeholders, which will be replaced with the same content which is
|
|
||||||
written to the `repl_events` table:
|
|
||||||
|
|
||||||
%n - node id
|
|
||||||
%e - event type
|
|
||||||
%s - success (1 or 0)
|
|
||||||
%t - timestamp
|
|
||||||
%d - description
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
event_notification_command=/path/to/some-script %n %e %s "%t" "%d"
|
|
||||||
|
|
||||||
By default the program defined with `event_notification_command` will be
|
|
||||||
executed for every event; to restrict execution to certain events, list
|
|
||||||
these in the parameter `event_notifications`
|
|
||||||
|
|
||||||
event_notifications=master_register,standby_register
|
|
||||||
|
|
||||||
Following event types currently exist:
|
|
||||||
|
|
||||||
master_register
|
|
||||||
standby_register
|
|
||||||
standby_unregister
|
|
||||||
standby_clone
|
|
||||||
standby_promote
|
|
||||||
witness_create
|
|
||||||
repmgrd_start
|
|
||||||
repmgrd_monitor
|
|
||||||
repmgrd_failover_promote
|
|
||||||
repmgrd_failover_follow
|
|
||||||
|
|
||||||
|
|
||||||
Cascading replication
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
Cascading replication - where a standby can connect to an upstream node and not
|
|
||||||
the master server itself - was introduced in PostgreSQL 9.2. `repmgr` and
|
|
||||||
`repmgrd` support cascading replication by keeping track of the relationship
|
|
||||||
between standby servers - each node record is stored with the node id of its
|
|
||||||
upstream ("parent") server (except of course the master server).
|
|
||||||
|
|
||||||
In a failover situation where the master node fails and a top-level standby
|
|
||||||
is promoted, a standby connected to another standby will not be affected
|
|
||||||
and continue working as normal (even if the upstream standby it's connected
|
|
||||||
to becomes the master node). If however the node's direct upstream fails,
|
|
||||||
the "cascaded standby" will attempt to reconnect to that node's parent.
|
|
||||||
|
|
||||||
To configure standby servers for cascading replication, add the parameter
|
|
||||||
`upstream_node` to `repmgr.conf` and set it to the id of the node it should
|
|
||||||
connect to, e.g.:
|
|
||||||
|
|
||||||
cluster=test
|
|
||||||
node=2
|
|
||||||
node_name=node2
|
|
||||||
upstream_node=1
|
|
||||||
|
|
||||||
Replication slots
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
Replication slots were introduced with PostgreSQL 9.4 and enable standbys to
|
|
||||||
notify the master of their WAL consumption, ensuring that the master will
|
|
||||||
not remove any WAL files until they have been received by all standbys.
|
|
||||||
This mitigates the requirement to manage WAL file retention using
|
|
||||||
`wal_keep_segments` etc., with the caveat that if a standby fails, no WAL
|
|
||||||
files will be removed until the standby's replication slot is deleted.
|
|
||||||
|
|
||||||
To enable replication slots, set the boolean parameter `use_replication_slots`
|
|
||||||
in `repmgr.conf`:
|
|
||||||
|
|
||||||
use_replication_slots=1
|
|
||||||
|
|
||||||
`repmgr` will automatically generate an appropriate slot name, which is
|
|
||||||
stored in the `repl_nodes` table.
|
|
||||||
|
|
||||||
Note that `repmgr` will fail with an error if this option is specified when
|
|
||||||
working with PostgreSQL 9.3.
|
|
||||||
|
|
||||||
Be aware that when initially cloning a standby, you will need to ensure
|
|
||||||
that all required WAL files remain available while the cloning is taking
|
|
||||||
place. If using the default `pg_basebackup` method, we recommend setting
|
|
||||||
`pg_basebackup`'s `--xlog-method` parameter to `stream` like this:
|
|
||||||
|
|
||||||
pg_basebackup_options='--xlog-method=stream'
|
|
||||||
|
|
||||||
See the `pg_basebackup` documentation [*] for details. Otherwise you'll need
|
|
||||||
to set `wal_keep_segments` to an appropriately high value.
|
|
||||||
|
|
||||||
[*] http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
|
|
||||||
|
|
||||||
Further reading:
|
|
||||||
* http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
|
|
||||||
* http://blog.2ndquadrant.com/postgresql-9-4-slots/
|
|
||||||
|
|
||||||
Upgrading from repmgr 2
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
`repmgr 3` is largely compatible with `repmgr 2`; the only step required
|
|
||||||
to upgrade is to update the `repl_nodes` table to the definition needed
|
|
||||||
by `repmgr 3`. See the file `sql/repmgr2_repmgr3.sql` for details on how
|
|
||||||
to do this.
|
|
||||||
|
|
||||||
`repmgrd` must *not* be running while `repl_nodes` is being updated.
|
|
||||||
|
|
||||||
Existing `repmgr.conf` files can be retained as-is.
|
|
||||||
|
|
||||||
---------------------------------------
|
|
||||||
|
|
||||||
Reference
|
|
||||||
---------
|
|
||||||
|
|
||||||
### repmgr command reference
|
|
||||||
|
|
||||||
Not all of these commands need the ``repmgr.conf`` file, but they need to be able to
|
|
||||||
connect to the remote and local databases.
|
|
||||||
|
|
||||||
You can teach it which is the remote database by using the -h parameter or
|
|
||||||
as a last parameter in standby clone and standby follow. If you need to specify
|
|
||||||
a port different then the default 5432 you can specify a -p parameter.
|
|
||||||
Standby is always considered as localhost and a second -p parameter will indicate
|
|
||||||
its port if is different from the default one.
|
|
||||||
|
|
||||||
* `master register`
|
|
||||||
|
|
||||||
Registers a master in a cluster. This command needs to be executed before any
|
|
||||||
standby nodes are registered.
|
|
||||||
|
|
||||||
`primary register` can be used as an alias for `master register`.
|
|
||||||
|
|
||||||
* `standby register`
|
|
||||||
|
|
||||||
Registers a standby with `repmgr`. This command needs to be executed to enable
|
|
||||||
promote/follow operations and to allow `repmgrd` to work with the node.
|
|
||||||
An existing standby can be registered using this command.
|
|
||||||
|
|
||||||
* `standby unregister`
|
|
||||||
|
|
||||||
Unregisters a standby with `repmgr`. This command does not affect the actual
|
|
||||||
replication.
|
|
||||||
|
|
||||||
* `standby clone [node to be cloned]`
|
|
||||||
|
|
||||||
Clones a new standby node from the data directory of the master (or
|
|
||||||
an upstream cascading standby) using `pg_basebackup` or `rsync`.
|
|
||||||
Additionally it will create the `recovery.conf` file required to
|
|
||||||
start the server as a standby. This command does not require
|
|
||||||
`repmgr.conf` to be provided, but does require connection details
|
|
||||||
of the master or upstream server as command line parameters.
|
|
||||||
|
|
||||||
Provide the `-D/--data-dir` option to specify the destination data
|
|
||||||
directory; if not, the same directory path as on the source server
|
|
||||||
will be used. By default, `pg_basebackup` will be used to copy data
|
|
||||||
from the master or upstream node but this can only be used for
|
|
||||||
bootstrapping new installations. To update an existing but 'stale'
|
|
||||||
data directory (for example belonging to a failed master), `rsync`
|
|
||||||
must be used by specifying `--rsync-only`. In this case,
|
|
||||||
password-less SSH connections between servers are required.
|
|
||||||
|
|
||||||
* `standby promote`
|
|
||||||
|
|
||||||
Promotes a standby to a master if the current master has failed. This
|
|
||||||
command requires a valid `repmgr.conf` file for the standby, either
|
|
||||||
specified explicitly with `-f/--config-file` or located in the current
|
|
||||||
working directory; no additional arguments are required.
|
|
||||||
|
|
||||||
If the standby promotion succeeds, the server will not need to be
|
|
||||||
restarted. However any other standbys will need to follow the new server,
|
|
||||||
by using `standby follow` (see below); if `repmgrd` is active, it will
|
|
||||||
handle this.
|
|
||||||
|
|
||||||
This command will not function if the current master is still running.
|
|
||||||
|
|
||||||
* `witness create`
|
|
||||||
|
|
||||||
Creates a witness server as a separate PostgreSQL instance. This instance
|
|
||||||
can be on a separate server or a server running an existing node. The
|
|
||||||
witness server contain a copy of the repmgr metadata tables but will not
|
|
||||||
be set up as a standby; instead it will update its metadata copy each
|
|
||||||
time a failover occurs.
|
|
||||||
|
|
||||||
Note that it only makes sense to create a witness server if `repmgrd`
|
|
||||||
is in use; see section "witness server" above.
|
|
||||||
|
|
||||||
By default the witness server will use port 5499 to facilitate easier setup
|
|
||||||
on a server running an existing node.
|
|
||||||
|
|
||||||
* `standby follow`
|
|
||||||
|
|
||||||
Attaches the standby to a new master. This command requires a valid
|
|
||||||
`repmgr.conf` file for the standby, either specified explicitly with
|
|
||||||
`-f/--config-file` or located in the current working directory; no
|
|
||||||
additional arguments are required.
|
|
||||||
|
|
||||||
This command will force a restart of the standby server. It can only be used
|
|
||||||
to attach a standby to a new master node.
|
|
||||||
|
|
||||||
* `cluster show`
|
|
||||||
|
|
||||||
Displays information about each node in the replication cluster. This
|
|
||||||
command polls each registered server and shows its role (master / standby /
|
|
||||||
witness) or "FAILED" if the node doesn't respond. It polls each server
|
|
||||||
directly and can be run on any node in the cluster; this is also useful
|
|
||||||
when analyzing connectivity from a particular node.
|
|
||||||
|
|
||||||
This command requires a valid `repmgr.conf` file for the node on which it is
|
|
||||||
executed, either specified explicitly with `-f/--config-file` or located in
|
|
||||||
the current working directory; no additional arguments are required.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
repmgr -f /path/to/repmgr.conf cluster show
|
|
||||||
Role | Connection String
|
|
||||||
* master | host=node1 dbname=repmgr user=repmgr
|
|
||||||
standby | host=node2 dbname=repmgr user=repmgr
|
|
||||||
standby | host=node3 dbname=repmgr user=repmgr
|
|
||||||
|
|
||||||
|
|
||||||
* `cluster cleanup`
|
|
||||||
|
|
||||||
Purges monitoring history from the `repl_monitor` table to prevent excessive
|
|
||||||
table growth. Use the `-k/--keep-history` to specify the number of days of
|
|
||||||
monitoring history to retain. This command can be used manually or as a
|
|
||||||
cronjob.
|
|
||||||
|
|
||||||
This command requires a valid `repmgr.conf` file for the node on which it is
|
|
||||||
executed, either specified explicitly with `-f/--config-file` or located in
|
|
||||||
the current working directory; no additional arguments are required.
|
|
||||||
|
|
||||||
### repmgr configuration file
|
|
||||||
|
|
||||||
See `repmgr.conf.sample` for an example configuration file with available
|
|
||||||
configuration settings annotated.
|
|
||||||
|
|
||||||
### repmgr database schema
|
|
||||||
|
|
||||||
`repmgr` creates a small schema for its own use in the database specified in
|
|
||||||
each node's `conninfo` configuration parameter. This database can in principle
|
|
||||||
be any database. The schema name is the global `cluster` name prefixed
|
|
||||||
with `repmgr_`, so for the example setup above the schema name is
|
|
||||||
`repmgr_test`.
|
|
||||||
|
|
||||||
The schema contains two tables:
|
|
||||||
|
|
||||||
* `repl_nodes`
|
|
||||||
stores information about all registered servers in the cluster
|
|
||||||
* `repl_monitor`
|
|
||||||
stores monitoring information about each node (generated by `repmgrd` with
|
|
||||||
`-m/--monitoring-history` option enabled)
|
|
||||||
|
|
||||||
and one view:
|
|
||||||
* `repl_status`
|
|
||||||
summarizes the latest monitoring information for each node (generated by `repmgrd` with
|
|
||||||
`-m/--monitoring-history` option enabled)
|
|
||||||
|
|
||||||
### Error codes
|
|
||||||
|
|
||||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
|
||||||
exit:
|
|
||||||
|
|
||||||
* SUCCESS (0) Program ran successfully.
|
|
||||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
|
||||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
|
||||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
|
||||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
|
||||||
* ERR_DB_QUERY (7) Error while executing a database query
|
|
||||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
|
||||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
|
||||||
* ERR_STR_OVERFLOW (10) String overflow error
|
|
||||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
|
||||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
|
||||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
|
||||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
|
||||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
|
||||||
|
|
||||||
Support and Assistance
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
2ndQuadrant provides 24x7 production support for repmgr, including
|
|
||||||
configuration assistance, installation verification and training for
|
|
||||||
running a robust replication cluster. For further details see:
|
|
||||||
|
|
||||||
* http://2ndquadrant.com/en/support/
|
|
||||||
|
|
||||||
There is a mailing list/forum to discuss contributions or issues
|
|
||||||
http://groups.google.com/group/repmgr
|
|
||||||
|
|
||||||
The IRC channel #repmgr is registered with freenode.
|
|
||||||
|
|
||||||
Further information is available at http://www.repmgr.org/
|
|
||||||
|
|
||||||
We'd love to hear from you about how you use repmgr. Case studies and
|
|
||||||
news are always welcome. Send us an email at info@2ndQuadrant.com, or
|
|
||||||
send a postcard to
|
|
||||||
|
|
||||||
repmgr
|
|
||||||
c/o 2ndQuadrant
|
|
||||||
7200 The Quorum
|
|
||||||
Oxford Business Park North
|
|
||||||
Oxford
|
|
||||||
OX4 2JZ
|
|
||||||
United Kingdom
|
|
||||||
|
|
||||||
Thanks from the repmgr core team.
|
|
||||||
|
|
||||||
* Ian Barwick
|
|
||||||
* Jaime Casanova
|
|
||||||
* Abhijit Menon-Sen
|
|
||||||
* Simon Riggs
|
|
||||||
* Cedric Villemain
|
|
||||||
|
|
||||||
Further reading
|
|
||||||
---------------
|
|
||||||
|
|
||||||
* http://blog.2ndquadrant.com/announcing-repmgr-2-0/
|
|
||||||
* http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
|
||||||
* http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
|
|
||||||
1144
README.rst
Normal file
1144
README.rst
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,21 +1,18 @@
|
|||||||
Summary: repmgr
|
Summary: repmgr
|
||||||
Name: repmgr
|
Name: repmgr
|
||||||
Version: 3.0
|
Version: 2.0
|
||||||
Release: 1
|
Release: 2
|
||||||
License: GPLv3
|
License: GPLv3
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
URL: http://repmgr.org
|
URL: http://repmgr.org
|
||||||
Packager: Ian Barwick <ian@2ndquadrant.com>
|
Packager: Nathan Van Overloop <nathan.van.overloop@nexperteam.be>
|
||||||
Vendor: 2ndQuadrant Limited
|
Vendor: 2ndQuadrant Limited
|
||||||
Distribution: centos
|
Distribution: centos
|
||||||
Source0: %{name}-%{version}.tar.gz
|
Source0: %{name}-%{version}.tar.gz
|
||||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||||
|
|
||||||
%description
|
%description
|
||||||
repmgr is a utility suite which greatly simplifies
|
repmgr for centos6
|
||||||
the process of setting up and managing replication
|
|
||||||
using streaming replication within a cluster of
|
|
||||||
PostgreSQL servers.
|
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup
|
%setup
|
||||||
@@ -53,8 +50,6 @@ export PATH=$PATH:/usr/pgsql-9.3/bin/
|
|||||||
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
|
|
||||||
- build for repmgr 3.0
|
|
||||||
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
|
||||||
- fix witness creation to create db and user if needed
|
- fix witness creation to create db and user if needed
|
||||||
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
|
||||||
@@ -12,7 +12,7 @@ REPMGRD_ENABLED=no
|
|||||||
#REPMGRD_USER=postgres
|
#REPMGRD_USER=postgres
|
||||||
|
|
||||||
# repmgrd binary
|
# repmgrd binary
|
||||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
#REPMGRD_BIN=/usr/bin/repmgr
|
||||||
|
|
||||||
# pid file
|
# pid file
|
||||||
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
|
||||||
|
|||||||
36
SSH-RSYNC.md
36
SSH-RSYNC.md
@@ -1,36 +0,0 @@
|
|||||||
Set up trusted copy between postgres accounts
|
|
||||||
---------------------------------------------
|
|
||||||
|
|
||||||
If you need to use `rsync` to clone standby servers, the `postgres` account
|
|
||||||
on your primary and standby servers must be each able to access the other
|
|
||||||
using SSH without a password.
|
|
||||||
|
|
||||||
First generate an ssh key, using an empty passphrase, and copy the resulting
|
|
||||||
keys and a matching authorization file to a privileged user account on the other
|
|
||||||
system:
|
|
||||||
|
|
||||||
[postgres@node1]$ ssh-keygen -t rsa
|
|
||||||
Generating public/private rsa key pair.
|
|
||||||
Enter file in which to save the key (/var/lib/pgsql/.ssh/id_rsa):
|
|
||||||
Enter passphrase (empty for no passphrase):
|
|
||||||
Enter same passphrase again:
|
|
||||||
Your identification has been saved in /var/lib/pgsql/.ssh/id_rsa.
|
|
||||||
Your public key has been saved in /var/lib/pgsql/.ssh/id_rsa.pub.
|
|
||||||
The key fingerprint is:
|
|
||||||
aa:bb:cc:dd:ee:ff:aa:11:22:33:44:55:66:77:88:99 postgres@db1.domain.com
|
|
||||||
[postgres@node1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
|
||||||
[postgres@node1]$ chmod go-rwx ~/.ssh/*
|
|
||||||
[postgres@node1]$ cd ~/.ssh
|
|
||||||
[postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys user@node2:
|
|
||||||
|
|
||||||
Login as a user on the other system, and install the files into the `postgres`
|
|
||||||
user's account:
|
|
||||||
|
|
||||||
[user@node2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa
|
|
||||||
[user@node2 ~]$ sudo mkdir -p ~postgres/.ssh
|
|
||||||
[user@node2 ~]$ sudo chown postgres.postgres ~postgres/.ssh
|
|
||||||
[user@node2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh
|
|
||||||
[user@node2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh
|
|
||||||
|
|
||||||
Now test that ssh in both directions works. You may have to accept some new
|
|
||||||
known hosts in the process.
|
|
||||||
61
TODO
61
TODO
@@ -5,15 +5,10 @@ Known issues in repmgr
|
|||||||
the database server using the ``pg_ctl`` command may accidentally
|
the database server using the ``pg_ctl`` command may accidentally
|
||||||
terminate after their associated ssh session ends.
|
terminate after their associated ssh session ends.
|
||||||
|
|
||||||
* PGPASSFILE may not be passed to pg_basebackup
|
|
||||||
|
|
||||||
|
|
||||||
Planned feature improvements
|
Planned feature improvements
|
||||||
============================
|
============================
|
||||||
|
|
||||||
* Use 'primary' instead of 'master' in documentation and log output
|
* Timeline increases when promoting a standby
|
||||||
for consistency with PostgreSQL documentation. See also commit
|
|
||||||
870b0a53b627eeb9aca1fc14cbafe25b5beafe12.
|
|
||||||
|
|
||||||
* A better check which standby did receive most of the data
|
* A better check which standby did receive most of the data
|
||||||
|
|
||||||
@@ -21,57 +16,3 @@ Planned feature improvements
|
|||||||
algorithm
|
algorithm
|
||||||
|
|
||||||
* include support for delayed standbys
|
* include support for delayed standbys
|
||||||
|
|
||||||
* Create the repmgr user/database on "master register".
|
|
||||||
|
|
||||||
* Use pg_basebackup for the data directory, and ALSO rsync for the
|
|
||||||
configuration files.
|
|
||||||
|
|
||||||
* If no configuration file supplied, search in sensible default locations
|
|
||||||
(currently: current directory and `pg_config --sysconfdir`); if
|
|
||||||
possible this should include the location provided by the package,
|
|
||||||
if installed.
|
|
||||||
|
|
||||||
* repmgrd: if connection to the upstream node fails on startup, optionally
|
|
||||||
retry for a certain period before giving up; this will cover cases when
|
|
||||||
e.g. primary and standby are both starting up, and the standby comes up
|
|
||||||
before the primary. See github issue #80.
|
|
||||||
|
|
||||||
* make old master node ID available for event notification commands
|
|
||||||
(See github issue #80).
|
|
||||||
|
|
||||||
* Have pg_basebackup use replication slots, if and when support for
|
|
||||||
this is added; see:
|
|
||||||
http://www.postgresql.org/message-id/555DD2B2.7020000@gmx.net
|
|
||||||
|
|
||||||
* use "primary/standby" terminology in place of "master/slave" for consistency
|
|
||||||
with main PostrgreSQL usage
|
|
||||||
|
|
||||||
* repmgr standby clone: possibility to use barman instead of performing a new base backup
|
|
||||||
|
|
||||||
* possibility to transform a failed master into a new standby with pg_rewind
|
|
||||||
|
|
||||||
* "repmgr standby switchover" to promote a standby in a controlled manner
|
|
||||||
and convert the existing primary into a standby
|
|
||||||
|
|
||||||
* make repmgrd more robust
|
|
||||||
|
|
||||||
* repmgr: when cloning a standby using pg_basebackup and replication slots are
|
|
||||||
requested, activate the replication slot using pg_receivexlog to negate the
|
|
||||||
need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).
|
|
||||||
|
|
||||||
Usability improvements
|
|
||||||
======================
|
|
||||||
|
|
||||||
* repmgr: add interrupt handler, so that if the program is interrupted
|
|
||||||
while running a backup, an attempt can be made to execute pg_stop_backup()
|
|
||||||
on the primary, to prevent an orphaned backup state existing.
|
|
||||||
|
|
||||||
* repmgr: when unregistering a node, delete any entries in the repl_monitoring
|
|
||||||
table.
|
|
||||||
|
|
||||||
* repmgr: for "standby unregister", accept connection parameters for the
|
|
||||||
primary and perform metadata updates (and slot removal) directly on
|
|
||||||
the primary, to allow a shutdown standby to be unregistered
|
|
||||||
(currently the standby must still be running, which means the replication
|
|
||||||
slot can't be dropped).
|
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
====================================================
|
=====================================================
|
||||||
PostgreSQL Automatic Failover - User Documentation
|
PostgreSQL Automatic Fail-Over - User Documentation
|
||||||
====================================================
|
=====================================================
|
||||||
|
|
||||||
Automatic Failover
|
Automatic Failover
|
||||||
==================
|
==================
|
||||||
|
|
||||||
repmgr allows for automatic failover when it detects the failure of the master node.
|
repmgr allows setups for automatic failover when it detects the failure of the master node.
|
||||||
Following is a quick setup for this.
|
Following is a quick setup for this.
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
@@ -14,33 +14,34 @@ Installation
|
|||||||
For convenience, we define:
|
For convenience, we define:
|
||||||
|
|
||||||
**node1**
|
**node1**
|
||||||
is the fully qualified domain name of the Master server, IP 192.168.1.10
|
is the hostname fully qualified of the Master server, IP 192.168.1.10
|
||||||
**node2**
|
**node2**
|
||||||
is the fully qualified domain name of the Standby server, IP 192.168.1.11
|
is the hostname fully qualified of the Standby server, IP 192.168.1.11
|
||||||
**witness**
|
**witness**
|
||||||
is the fully qualified domain name of the server used as a witness, IP 192.168.1.12
|
is the hostname fully qualified of the server used for witness, IP 192.168.1.12
|
||||||
|
|
||||||
**Note:** We don't recommend using names with the status of a server like «masterserver»,
|
**Note:** It is not recommanded to use name defining status of a server like «masterserver»,
|
||||||
because it would be confusing once a failover takes place and the Master is
|
this is a name leading to confusion once a failover take place and the Master is
|
||||||
now on the «standbyserver».
|
now on the «standbyserver».
|
||||||
|
|
||||||
Summary
|
Summary
|
||||||
-------
|
-------
|
||||||
|
|
||||||
2 PostgreSQL servers are involved in the replication. Automatic failover needs
|
2 PostgreSQL servers are involved in the replication. Automatic fail-over need
|
||||||
a vote to decide what server it should promote, so an odd number is required.
|
to vote to decide what server it should promote, thus an odd number is required
|
||||||
A witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
and a witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
||||||
cluster to communicate with other repmgrd daemons.
|
cluster to communicate with other repmgrd daemons.
|
||||||
|
|
||||||
1. Install PostgreSQL in all the servers involved (including the witness server)
|
1. Install PostgreSQL in all the servers involved (including the server used for
|
||||||
|
witness)
|
||||||
|
|
||||||
2. Install repmgr in all the servers involved (including the witness server)
|
2. Install repmgr in all the servers involved (including the server used for witness)
|
||||||
|
|
||||||
3. Configure the Master PostreSQL
|
3. Configure the Master PostreSQL
|
||||||
|
|
||||||
4. Clone the Master to the Standby using "repmgr standby clone" command
|
4. Clone the Master to the Standby using "repmgr standby clone" command
|
||||||
|
|
||||||
5. Configure repmgr in all the servers involved (including the witness server)
|
5. Configure repmgr in all the servers involved (including the server used for witness)
|
||||||
|
|
||||||
6. Register Master and Standby nodes
|
6. Register Master and Standby nodes
|
||||||
|
|
||||||
@@ -65,14 +66,14 @@ Install repmgr following the steps in the README file.
|
|||||||
Configure PostreSQL
|
Configure PostreSQL
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Log in to node1.
|
Log in node1.
|
||||||
|
|
||||||
Edit the file postgresql.conf and modify the parameters::
|
Edit the file postgresql.conf and modify the parameters::
|
||||||
|
|
||||||
listen_addresses='*'
|
listen_addresses='*'
|
||||||
wal_level = 'hot_standby'
|
wal_level = 'hot_standby'
|
||||||
archive_mode = on
|
archive_mode = on
|
||||||
archive_command = 'cd .' # we can also use exit 0, anything that
|
archive_command = 'cd .' # we can also use exit 0, anything that
|
||||||
# just does nothing
|
# just does nothing
|
||||||
max_wal_senders = 10
|
max_wal_senders = 10
|
||||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||||
@@ -93,6 +94,7 @@ Create the user and database to manage replication::
|
|||||||
su - postgres
|
su - postgres
|
||||||
createuser -s repmgr
|
createuser -s repmgr
|
||||||
createdb -O repmgr repmgr
|
createdb -O repmgr repmgr
|
||||||
|
psql -f /usr/share/postgresql/9.0/contrib/repmgr_funcs.sql repmgr
|
||||||
|
|
||||||
Restart the PostgreSQL server::
|
Restart the PostgreSQL server::
|
||||||
|
|
||||||
@@ -115,9 +117,9 @@ Create the ssh-key for the postgres user and copy it to other servers::
|
|||||||
Clone Master
|
Clone Master
|
||||||
------------
|
------------
|
||||||
|
|
||||||
Log in to node2.
|
Log in node2.
|
||||||
|
|
||||||
Clone node1 (the current Master)::
|
Clone the node1 (the current Master)::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -d repmgr -U repmgr -h node1 standby clone
|
repmgr -d repmgr -U repmgr -h node1 standby clone
|
||||||
@@ -131,7 +133,7 @@ And check everything is fine in the server log.
|
|||||||
Configure repmgr
|
Configure repmgr
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
Log in to each server and configure repmgr by editing the file
|
Log in each server and configure repmgr by editing the file
|
||||||
/etc/repmgr/repmgr.conf::
|
/etc/repmgr/repmgr.conf::
|
||||||
|
|
||||||
cluster=my_cluster
|
cluster=my_cluster
|
||||||
@@ -152,13 +154,13 @@ Log in to each server and configure repmgr by editing the file
|
|||||||
**node_name**
|
**node_name**
|
||||||
is an identifier for every node.
|
is an identifier for every node.
|
||||||
**conninfo**
|
**conninfo**
|
||||||
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration you need to add a 'port=5499' to the conninfo.
|
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
|
||||||
**master_response_timeout**
|
**master_response_timeout**
|
||||||
is the maximum amount of time we are going to wait before deciding the master has died and start the failover procedure.
|
is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
|
||||||
**reconnect_attempts**
|
**reconnect_attempts**
|
||||||
is the number of times we will try to reconnect to master after a failure has been detected and before start the failover procedure.
|
is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
|
||||||
**reconnect_interval**
|
**reconnect_interval**
|
||||||
is the amount of time between retries to reconnect to master after a failure has been detected and before start the failover procedure.
|
is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
|
||||||
**failover**
|
**failover**
|
||||||
configure behavior: *manual* or *automatic*.
|
configure behavior: *manual* or *automatic*.
|
||||||
**promote_command**
|
**promote_command**
|
||||||
@@ -169,16 +171,14 @@ Log in to each server and configure repmgr by editing the file
|
|||||||
Register Master and Standby
|
Register Master and Standby
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
Log in to node1.
|
Log in node1.
|
||||||
|
|
||||||
Register the node as master::
|
Register the node as Master::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||||
|
|
||||||
This will also create the repmgr schema and functions.
|
Log in node2. Register it as a standby::
|
||||||
|
|
||||||
Log in to node2. Register it as a standby::
|
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -f /etc/repmgr/repmgr.conf standby register
|
repmgr -f /etc/repmgr/repmgr.conf standby register
|
||||||
@@ -186,51 +186,38 @@ Log in to node2. Register it as a standby::
|
|||||||
Initialize witness server
|
Initialize witness server
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Log in to witness.
|
Log in witness.
|
||||||
|
|
||||||
Initialize the witness server::
|
Initialize the witness server::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
|
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
|
||||||
|
|
||||||
The witness server needs the following information from the command
|
It needs information to connect to the master to copy the configuration of the cluster, also it needs to know where it should initialize it's own $PGDATA.
|
||||||
line:
|
As part of the procees it also ask for the superuser password so it can connect when needed.
|
||||||
|
|
||||||
* Connection details for the current master, to copy the cluster
|
|
||||||
configuration.
|
|
||||||
* A location for initializing its own $PGDATA.
|
|
||||||
|
|
||||||
repmgr will also ask for the superuser password on the witness database so
|
|
||||||
it can reconnect when needed (the command line option --initdb-no-pwprompt
|
|
||||||
will set up a password-less superuser).
|
|
||||||
|
|
||||||
By default the witness server will listen on port 5499; this value can be
|
|
||||||
overridden by explicitly providing the port number in the conninfo string
|
|
||||||
in repmgr.conf. (Note that it is also possible to specify the port number
|
|
||||||
with the -l/--local-port option, however this option is now deprecated and
|
|
||||||
will be overridden by a port setting in the conninfo string).
|
|
||||||
|
|
||||||
Start the repmgrd daemons
|
Start the repmgrd daemons
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Log in to node2 and witness::
|
Log in node2 and witness.
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgrd -f /etc/repmgr/repmgr.conf --daemonize -> /var/log/postgresql/repmgr.log 2>&1
|
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
|
||||||
|
|
||||||
**Note:** The Master does not need a repmgrd daemon.
|
**Note:** The Master does not need a repmgrd daemon.
|
||||||
|
|
||||||
|
|
||||||
Suspend Automatic behavior
|
Suspend Automatic behavior
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
Edit the repmgr.conf of the node to remove from automatic processing and change::
|
Edit the repmgr.conf of the node to remove from automatic processing and change::
|
||||||
|
|
||||||
failover=manual
|
failover=manual
|
||||||
|
|
||||||
Then, signal repmgrd daemon::
|
Then, signal repmgrd daemon::
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
kill -HUP $(pidof repmgrd)
|
kill -HUP `pidof repmgrd`
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
66
check_dir.c
66
check_dir.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.c - Directories management functions
|
* check_dir.c - Directories management functions
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -23,19 +23,15 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ftw.h>
|
|
||||||
|
|
||||||
/* NB: postgres_fe must be included BEFORE check_dir */
|
/* NB: postgres_fe must be included BEFORE check_dir */
|
||||||
#include <libpq-fe.h>
|
#include <libpq-fe.h>
|
||||||
#include <postgres_fe.h>
|
#include <postgres_fe.h>
|
||||||
|
|
||||||
#include "check_dir.h"
|
#include "check_dir.h"
|
||||||
|
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
static bool _create_pg_dir(char *dir, bool force, bool for_witness);
|
|
||||||
static int unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make sure the directory either doesn't exist or is empty
|
* make sure the directory either doesn't exist or is empty
|
||||||
* we use this function to check the new data directory and
|
* we use this function to check the new data directory and
|
||||||
@@ -103,7 +99,7 @@ create_dir(char *dir)
|
|||||||
if (mkdir_p(dir, 0700) == 0)
|
if (mkdir_p(dir, 0700) == 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
log_err(_("unable to create directory \"%s\": %s\n"),
|
log_err(_("Could not create directory \"%s\": %s\n"),
|
||||||
dir, strerror(errno));
|
dir, strerror(errno));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@@ -248,19 +244,6 @@ is_pg_dir(char *dir)
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
create_pg_dir(char *dir, bool force)
|
create_pg_dir(char *dir, bool force)
|
||||||
{
|
|
||||||
return _create_pg_dir(dir, force, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
create_witness_pg_dir(char *dir, bool force)
|
|
||||||
{
|
|
||||||
return _create_pg_dir(dir, force, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static bool
|
|
||||||
_create_pg_dir(char *dir, bool force, bool for_witness)
|
|
||||||
{
|
{
|
||||||
bool pg_dir = false;
|
bool pg_dir = false;
|
||||||
|
|
||||||
@@ -273,7 +256,7 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
|||||||
|
|
||||||
if (!create_dir(dir))
|
if (!create_dir(dir))
|
||||||
{
|
{
|
||||||
log_err(_("unable to create directory \"%s\"...\n"),
|
log_err(_("couldn't create directory \"%s\"...\n"),
|
||||||
dir);
|
dir);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -285,7 +268,7 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
|||||||
|
|
||||||
if (!set_dir_permissions(dir))
|
if (!set_dir_permissions(dir))
|
||||||
{
|
{
|
||||||
log_err(_("unable to change permissions of directory \"%s\": %s\n"),
|
log_err(_("could not change permissions of directory \"%s\": %s\n"),
|
||||||
dir, strerror(errno));
|
dir, strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -297,33 +280,21 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
|||||||
|
|
||||||
pg_dir = is_pg_dir(dir);
|
pg_dir = is_pg_dir(dir);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we use force to reduce the time needed to restore a node which
|
||||||
|
* turn async after a failover or anything else
|
||||||
|
*/
|
||||||
if (pg_dir && force)
|
if (pg_dir && force)
|
||||||
{
|
{
|
||||||
|
|
||||||
/*
|
|
||||||
* The witness server does not store any data other than a copy of the
|
|
||||||
* repmgr metadata, so in --force mode we can simply overwrite the
|
|
||||||
* directory.
|
|
||||||
*
|
|
||||||
* For non-witness servers, we'll leave the data in place, both to reduce
|
|
||||||
* the risk of unintentional data loss and to make it possible for the
|
|
||||||
* data directory to be brought up-to-date with rsync.
|
|
||||||
*/
|
|
||||||
if (for_witness)
|
|
||||||
{
|
|
||||||
log_notice(_("deleting existing data directory \"%s\"\n"), dir);
|
|
||||||
nftw(dir, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
|
||||||
}
|
|
||||||
/* Let it continue */
|
/* Let it continue */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (pg_dir && !force)
|
else if (pg_dir && !force)
|
||||||
{
|
{
|
||||||
log_hint(_("This looks like a PostgreSQL directory.\n"
|
log_warning(_("\nThis looks like a PostgreSQL directory.\n"
|
||||||
"If you are sure you want to clone here, "
|
"If you are sure you want to clone here, "
|
||||||
"please check there is no PostgreSQL server "
|
"please check there is no PostgreSQL server "
|
||||||
"running and use the -F/--force option\n"));
|
"running and use the --force option\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,14 +307,3 @@ _create_pg_dir(char *dir, bool force, bool for_witness)
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
|
|
||||||
{
|
|
||||||
int rv = remove(fpath);
|
|
||||||
|
|
||||||
if (rv)
|
|
||||||
perror(fpath);
|
|
||||||
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.h
|
* check_dir.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -26,6 +26,5 @@ bool create_dir(char *dir);
|
|||||||
bool set_dir_permissions(char *dir);
|
bool set_dir_permissions(char *dir);
|
||||||
bool is_pg_dir(char *dir);
|
bool is_pg_dir(char *dir);
|
||||||
bool create_pg_dir(char *dir, bool force);
|
bool create_pg_dir(char *dir, bool force);
|
||||||
bool create_witness_pg_dir(char *dir, bool force);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
68
config.h
68
config.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* config.h
|
* config.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -20,42 +20,13 @@
|
|||||||
#ifndef _REPMGR_CONFIG_H_
|
#ifndef _REPMGR_CONFIG_H_
|
||||||
#define _REPMGR_CONFIG_H_
|
#define _REPMGR_CONFIG_H_
|
||||||
|
|
||||||
#include "postgres_fe.h"
|
#include "repmgr.h"
|
||||||
|
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
#define CONFIG_FILE_NAME "repmgr.conf"
|
|
||||||
|
|
||||||
typedef struct EventNotificationListCell
|
|
||||||
{
|
|
||||||
struct EventNotificationListCell *next;
|
|
||||||
char event_type[MAXLEN];
|
|
||||||
} EventNotificationListCell;
|
|
||||||
|
|
||||||
typedef struct EventNotificationList
|
|
||||||
{
|
|
||||||
EventNotificationListCell *head;
|
|
||||||
EventNotificationListCell *tail;
|
|
||||||
} EventNotificationList;
|
|
||||||
|
|
||||||
typedef struct TablespaceListCell
|
|
||||||
{
|
|
||||||
struct TablespaceListCell *next;
|
|
||||||
char old_dir[MAXPGPATH];
|
|
||||||
char new_dir[MAXPGPATH];
|
|
||||||
} TablespaceListCell;
|
|
||||||
|
|
||||||
typedef struct TablespaceList
|
|
||||||
{
|
|
||||||
TablespaceListCell *head;
|
|
||||||
TablespaceListCell *tail;
|
|
||||||
} TablespaceList;
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
char cluster_name[MAXLEN];
|
char cluster_name[MAXLEN];
|
||||||
int node;
|
int node;
|
||||||
int upstream_node;
|
|
||||||
char conninfo[MAXLEN];
|
char conninfo[MAXLEN];
|
||||||
int failover;
|
int failover;
|
||||||
int priority;
|
int priority;
|
||||||
@@ -68,44 +39,19 @@ typedef struct
|
|||||||
char ssh_options[QUERY_STR_LEN];
|
char ssh_options[QUERY_STR_LEN];
|
||||||
int master_response_timeout;
|
int master_response_timeout;
|
||||||
int reconnect_attempts;
|
int reconnect_attempts;
|
||||||
int reconnect_interval;
|
int reconnect_intvl;
|
||||||
char pg_bindir[MAXLEN];
|
char pg_bindir[MAXLEN];
|
||||||
char pg_ctl_options[MAXLEN];
|
char pgctl_options[MAXLEN];
|
||||||
char pg_basebackup_options[MAXLEN];
|
|
||||||
char logfile[MAXLEN];
|
char logfile[MAXLEN];
|
||||||
int monitor_interval_secs;
|
int monitor_interval_secs;
|
||||||
int retry_promote_interval_secs;
|
int retry_promote_interval_secs;
|
||||||
int use_replication_slots;
|
|
||||||
char event_notification_command[MAXLEN];
|
|
||||||
EventNotificationList event_notifications;
|
|
||||||
TablespaceList tablespace_mapping;
|
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", 0, 0 }
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
void parse_config(const char *config_file, t_configuration_options * options);
|
||||||
{
|
|
||||||
struct ErrorListCell *next;
|
|
||||||
char *error_message;
|
|
||||||
} ErrorListCell;
|
|
||||||
|
|
||||||
typedef struct ErrorList
|
|
||||||
{
|
|
||||||
ErrorListCell *head;
|
|
||||||
ErrorListCell *tail;
|
|
||||||
} ErrorList;
|
|
||||||
|
|
||||||
void set_progname(const char *argv0);
|
|
||||||
const char * progname(void);
|
|
||||||
|
|
||||||
bool load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0);
|
|
||||||
bool reload_config(t_configuration_options *orig_options);
|
|
||||||
bool parse_config(t_configuration_options *options);
|
|
||||||
void parse_line(char *buff, char *name, char *value);
|
void parse_line(char *buff, char *name, char *value);
|
||||||
char *trim(char *s);
|
char *trim(char *s);
|
||||||
void error_list_append(ErrorList *error_list, char *error_message);
|
bool reload_config(char *config_file, t_configuration_options * orig_options);
|
||||||
int repmgr_atoi(const char *s,
|
|
||||||
const char *config_item,
|
|
||||||
ErrorList *error_list);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
85
dbutils.h
85
dbutils.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.h
|
* dbutils.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -20,96 +20,27 @@
|
|||||||
#ifndef _REPMGR_DBUTILS_H_
|
#ifndef _REPMGR_DBUTILS_H_
|
||||||
#define _REPMGR_DBUTILS_H_
|
#define _REPMGR_DBUTILS_H_
|
||||||
|
|
||||||
#include "access/xlogdefs.h"
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
UNKNOWN = 0,
|
|
||||||
MASTER,
|
|
||||||
STANDBY,
|
|
||||||
WITNESS
|
|
||||||
} t_server_type;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Struct to store node information
|
|
||||||
*/
|
|
||||||
typedef struct s_node_info
|
|
||||||
{
|
|
||||||
int node_id;
|
|
||||||
int upstream_node_id;
|
|
||||||
t_server_type type;
|
|
||||||
char name[MAXLEN];
|
|
||||||
char conninfo_str[MAXLEN];
|
|
||||||
char slot_name[MAXLEN];
|
|
||||||
int priority;
|
|
||||||
bool active;
|
|
||||||
bool is_ready;
|
|
||||||
bool is_visible;
|
|
||||||
XLogRecPtr xlog_location;
|
|
||||||
} t_node_info;
|
|
||||||
|
|
||||||
|
|
||||||
#define T_NODE_INFO_INITIALIZER { \
|
|
||||||
NODE_NOT_FOUND, \
|
|
||||||
NO_UPSTREAM_NODE, \
|
|
||||||
UNKNOWN, \
|
|
||||||
"", \
|
|
||||||
"", \
|
|
||||||
"", \
|
|
||||||
DEFAULT_PRIORITY, \
|
|
||||||
true, \
|
|
||||||
false, \
|
|
||||||
false, \
|
|
||||||
InvalidXLogRecPtr \
|
|
||||||
}
|
|
||||||
|
|
||||||
PGconn *establish_db_connection(const char *conninfo,
|
PGconn *establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *establish_db_connection_by_params(const char *keywords[],
|
PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||||
const char *values[],
|
const char *values[],
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
bool begin_transaction(PGconn *conn);
|
|
||||||
bool commit_transaction(PGconn *conn);
|
|
||||||
bool rollback_transaction(PGconn *conn);
|
|
||||||
bool check_cluster_schema(PGconn *conn);
|
|
||||||
int is_standby(PGconn *conn);
|
int is_standby(PGconn *conn);
|
||||||
|
int is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
|
||||||
bool is_pgup(PGconn *conn, int timeout);
|
bool is_pgup(PGconn *conn, int timeout);
|
||||||
int get_master_node_id(PGconn *conn, char *cluster);
|
char *pg_version(PGconn *conn, char *major_version);
|
||||||
int get_server_version(PGconn *conn, char *server_version);
|
int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||||
bool get_cluster_size(PGconn *conn, char *size);
|
const char *value);
|
||||||
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||||
|
|
||||||
int guc_set(PGconn *conn, const char *parameter, const char *op,
|
|
||||||
const char *value);
|
|
||||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
|
||||||
const char *value, const char *datatype);
|
const char *value, const char *datatype);
|
||||||
|
|
||||||
PGconn *get_upstream_connection(PGconn *standby_conn, char *cluster,
|
const char *get_cluster_size(PGconn *conn);
|
||||||
int node_id,
|
PGconn *get_master_connection(PGconn *standby_conn, char *schema, char *cluster,
|
||||||
int *upstream_node_id_ptr,
|
|
||||||
char *upstream_conninfo_out);
|
|
||||||
PGconn *get_master_connection(PGconn *standby_conn, char *cluster,
|
|
||||||
int *master_id, char *master_conninfo_out);
|
int *master_id, char *master_conninfo_out);
|
||||||
|
|
||||||
int wait_connection_availability(PGconn *conn, long long timeout);
|
int wait_connection_availability(PGconn *conn, long long timeout);
|
||||||
bool cancel_query(PGconn *conn, int timeout);
|
bool cancel_query(PGconn *conn, int timeout);
|
||||||
char *get_repmgr_schema(void);
|
|
||||||
char *get_repmgr_schema_quoted(PGconn *conn);
|
|
||||||
bool create_replication_slot(PGconn *conn, char *slot_name);
|
|
||||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
|
||||||
|
|
||||||
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
|
||||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
|
||||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
|
||||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
|
||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
|
||||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
|
||||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
|
||||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
|
||||||
PGresult * get_node_record(PGconn *conn, char *cluster, int node_id);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
4
debian/repmgr.repmgrd.default
vendored
4
debian/repmgr.repmgrd.default
vendored
@@ -12,7 +12,7 @@ REPMGRD_ENABLED=no
|
|||||||
#REPMGRD_USER=postgres
|
#REPMGRD_USER=postgres
|
||||||
|
|
||||||
# repmgrd binary
|
# repmgrd binary
|
||||||
#REPMGRD_BIN=/usr/bin/repmgrd
|
#REPMGR_BIN=/usr/bin/repmgr
|
||||||
|
|
||||||
# pid file
|
# pid file
|
||||||
#REPMGRD_PIDFILE=/var/run/repmgrd.pid
|
#REPMGR_PIDFILE=/var/run/repmgrd.pid
|
||||||
|
|||||||
2
debian/repmgr.repmgrd.init
vendored
2
debian/repmgr.repmgrd.init
vendored
@@ -59,7 +59,7 @@ do_stop()
|
|||||||
# 0 if daemon has been stopped
|
# 0 if daemon has been stopped
|
||||||
# 1 if daemon was already stopped
|
# 1 if daemon was already stopped
|
||||||
# other if daemon could not be stopped or a failure occurred
|
# other if daemon could not be stopped or a failure occurred
|
||||||
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --name "$(basename $REPMGRD_BIN)"
|
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN
|
||||||
}
|
}
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* errcode.h
|
* errcode.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -25,7 +25,9 @@
|
|||||||
#define SUCCESS 0
|
#define SUCCESS 0
|
||||||
#define ERR_BAD_CONFIG 1
|
#define ERR_BAD_CONFIG 1
|
||||||
#define ERR_BAD_RSYNC 2
|
#define ERR_BAD_RSYNC 2
|
||||||
|
#define ERR_STOP_BACKUP 3
|
||||||
#define ERR_NO_RESTART 4
|
#define ERR_NO_RESTART 4
|
||||||
|
#define ERR_NEEDS_XLOG 5
|
||||||
#define ERR_DB_CON 6
|
#define ERR_DB_CON 6
|
||||||
#define ERR_DB_QUERY 7
|
#define ERR_DB_QUERY 7
|
||||||
#define ERR_PROMOTED 8
|
#define ERR_PROMOTED 8
|
||||||
@@ -34,8 +36,5 @@
|
|||||||
#define ERR_FAILOVER_FAIL 11
|
#define ERR_FAILOVER_FAIL 11
|
||||||
#define ERR_BAD_SSH 12
|
#define ERR_BAD_SSH 12
|
||||||
#define ERR_SYS_FAILURE 13
|
#define ERR_SYS_FAILURE 13
|
||||||
#define ERR_BAD_BASEBACKUP 14
|
|
||||||
#define ERR_INTERNAL 15
|
|
||||||
#define ERR_MONITORING_FAIL 16
|
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
154
log.c
154
log.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.c - Logging methods
|
* log.c - Logging methods
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This module is a set of methods for logging (currently only syslog)
|
* This module is a set of methods for logging (currently only syslog)
|
||||||
*
|
*
|
||||||
@@ -39,37 +39,13 @@
|
|||||||
|
|
||||||
/* #define REPMGR_DEBUG */
|
/* #define REPMGR_DEBUG */
|
||||||
|
|
||||||
static int detect_log_facility(const char *facility);
|
|
||||||
static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
|
|
||||||
|
|
||||||
int log_type = REPMGR_STDERR;
|
|
||||||
int log_level = LOG_NOTICE;
|
|
||||||
int last_log_level = LOG_NOTICE;
|
|
||||||
int verbose_logging = false;
|
|
||||||
int terse_logging = false;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
|
||||||
{
|
|
||||||
va_list arglist;
|
|
||||||
|
|
||||||
va_start(arglist, fmt);
|
|
||||||
_stderr_log_with_level(level_name, level, fmt, arglist);
|
|
||||||
va_end(arglist);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
|
|
||||||
{
|
{
|
||||||
time_t t;
|
time_t t;
|
||||||
struct tm *tm;
|
struct tm *tm;
|
||||||
char buff[100];
|
char buff[100];
|
||||||
|
va_list ap;
|
||||||
/*
|
|
||||||
* Store the requested level so that if there's a subsequent
|
|
||||||
* log_hint(), we can suppress that if appropriate.
|
|
||||||
*/
|
|
||||||
last_log_level = level;
|
|
||||||
|
|
||||||
if (log_level >= level)
|
if (log_level >= level)
|
||||||
{
|
{
|
||||||
@@ -78,74 +54,24 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
|||||||
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
||||||
fprintf(stderr, "%s [%s] ", buff, level_name);
|
fprintf(stderr, "%s [%s] ", buff, level_name);
|
||||||
|
|
||||||
|
va_start(ap, fmt);
|
||||||
vfprintf(stderr, fmt, ap);
|
vfprintf(stderr, fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
log_hint(const char *fmt, ...)
|
|
||||||
{
|
|
||||||
va_list ap;
|
|
||||||
|
|
||||||
if (terse_logging == false)
|
static int detect_log_level(const char *level);
|
||||||
{
|
static int detect_log_facility(const char *facility);
|
||||||
va_start(ap, fmt);
|
|
||||||
_stderr_log_with_level("HINT", last_log_level, fmt, ap);
|
|
||||||
va_end(ap);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
log_verbose(int level, const char *fmt, ...)
|
|
||||||
{
|
|
||||||
va_list ap;
|
|
||||||
|
|
||||||
va_start(ap, fmt);
|
|
||||||
|
|
||||||
if (verbose_logging == true)
|
|
||||||
{
|
|
||||||
switch(level)
|
|
||||||
{
|
|
||||||
case LOG_EMERG:
|
|
||||||
_stderr_log_with_level("EMERG", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_ALERT:
|
|
||||||
_stderr_log_with_level("ALERT", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_CRIT:
|
|
||||||
_stderr_log_with_level("CRIT", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_ERR:
|
|
||||||
_stderr_log_with_level("ERR", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_WARNING:
|
|
||||||
_stderr_log_with_level("WARNING", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_NOTICE:
|
|
||||||
_stderr_log_with_level("NOTICE", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_INFO:
|
|
||||||
_stderr_log_with_level("INFO", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
case LOG_DEBUG:
|
|
||||||
_stderr_log_with_level("DEBUG", level, fmt, ap);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
va_end(ap);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
int log_type = REPMGR_STDERR;
|
||||||
|
int log_level = LOG_NOTICE;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
logger_init(t_configuration_options * opts, const char *ident)
|
logger_init(t_configuration_options * opts, const char *ident, const char *level, const char *facility)
|
||||||
{
|
{
|
||||||
char *level = opts->loglevel;
|
|
||||||
char *facility = opts->logfacility;
|
|
||||||
|
|
||||||
int l;
|
int l;
|
||||||
int f;
|
int f;
|
||||||
|
|
||||||
@@ -169,10 +95,10 @@ logger_init(t_configuration_options * opts, const char *ident)
|
|||||||
printf("Assigned level for logger: %d\n", l);
|
printf("Assigned level for logger: %d\n", l);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (l >= 0)
|
if (l > 0)
|
||||||
log_level = l;
|
log_level = l;
|
||||||
else
|
else
|
||||||
stderr_log_warning(_("Invalid log level \"%s\" (available values: DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
stderr_log_warning(_("Cannot detect log level %s (use any of DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (facility && *facility)
|
if (facility && *facility)
|
||||||
@@ -218,38 +144,18 @@ logger_init(t_configuration_options * opts, const char *ident)
|
|||||||
{
|
{
|
||||||
FILE *fd;
|
FILE *fd;
|
||||||
|
|
||||||
/* Check if we can write to the specified file before redirecting
|
|
||||||
* stderr - if freopen() fails, stderr output will vanish into
|
|
||||||
* the ether and the user won't know what's going on.
|
|
||||||
*/
|
|
||||||
|
|
||||||
fd = fopen(opts->logfile, "a");
|
|
||||||
if (fd == NULL)
|
|
||||||
{
|
|
||||||
stderr_log_err(_("Unable to open specified logfile '%s' for writing: %s\n"), opts->logfile, strerror(errno));
|
|
||||||
stderr_log_err(_("Terminating\n"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
fclose(fd);
|
|
||||||
|
|
||||||
stderr_log_notice(_("Redirecting logging output to '%s'\n"), opts->logfile);
|
|
||||||
fd = freopen(opts->logfile, "a", stderr);
|
fd = freopen(opts->logfile, "a", stderr);
|
||||||
|
|
||||||
/* It's possible freopen() may still fail due to e.g. a race condition;
|
|
||||||
as it's not feasible to restore stderr after a failed freopen(),
|
|
||||||
we'll write to stdout as a last resort.
|
|
||||||
*/
|
|
||||||
if (fd == NULL)
|
if (fd == NULL)
|
||||||
{
|
{
|
||||||
printf(_("Unable to open specified logfile %s for writing: %s\n"), opts->logfile, strerror(errno));
|
fprintf(stderr, "error reopening stderr to '%s': %s",
|
||||||
printf(_("Terminating\n"));
|
opts->logfile, strerror(errno));
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
logger_shutdown(void)
|
logger_shutdown(void)
|
||||||
@@ -263,32 +169,17 @@ logger_shutdown(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Indicate whether extra-verbose logging is required. This will
|
* Set a minimum logging level. Intended for command line verbosity
|
||||||
* generate a lot of output, particularly debug logging, and should
|
* options, which might increase requested logging over what's specified
|
||||||
* not be permanently enabled in production.
|
* in the regular configuration file.
|
||||||
*
|
|
||||||
* NOTE: in previous repmgr versions, this option forced the log
|
|
||||||
* level to INFO.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
logger_set_verbose(void)
|
logger_min_verbose(int minimum)
|
||||||
{
|
{
|
||||||
verbose_logging = true;
|
if (log_level < minimum)
|
||||||
|
log_level = minimum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Indicate whether some non-critical log messages can be omitted.
|
|
||||||
* Currently this includes warnings about irrelevant command line
|
|
||||||
* options and hints.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void logger_set_terse(void)
|
|
||||||
{
|
|
||||||
terse_logging = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
detect_log_level(const char *level)
|
detect_log_level(const char *level)
|
||||||
{
|
{
|
||||||
@@ -309,16 +200,17 @@ detect_log_level(const char *level)
|
|||||||
if (!strcmp(level, "EMERG"))
|
if (!strcmp(level, "EMERG"))
|
||||||
return LOG_EMERG;
|
return LOG_EMERG;
|
||||||
|
|
||||||
return -1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
detect_log_facility(const char *facility)
|
detect_log_facility(const char *facility)
|
||||||
{
|
{
|
||||||
int local = 0;
|
int local = 0;
|
||||||
|
|
||||||
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
|
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
|
||||||
{
|
{
|
||||||
|
|
||||||
local = atoi(&facility[5]);
|
local = atoi(&facility[5]);
|
||||||
|
|
||||||
switch (local)
|
switch (local)
|
||||||
|
|||||||
20
log.h
20
log.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.h
|
* log.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -25,9 +25,15 @@
|
|||||||
#define REPMGR_SYSLOG 1
|
#define REPMGR_SYSLOG 1
|
||||||
#define REPMGR_STDERR 2
|
#define REPMGR_STDERR 2
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= 90100)
|
||||||
void
|
void
|
||||||
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
|
#else
|
||||||
|
void
|
||||||
|
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
|
||||||
|
__attribute__((format(printf, 3, 4)));
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Standard error logging */
|
/* Standard error logging */
|
||||||
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
|
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
|
||||||
@@ -112,19 +118,13 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int detect_log_level(const char *level);
|
|
||||||
|
|
||||||
/* Logger initialisation and shutdown */
|
/* Logger initialisation and shutdown */
|
||||||
|
|
||||||
bool logger_init(t_configuration_options * opts, const char *ident);
|
|
||||||
|
|
||||||
bool logger_shutdown(void);
|
bool logger_shutdown(void);
|
||||||
|
|
||||||
void logger_set_verbose(void);
|
bool logger_init(t_configuration_options * opts, const char *ident,
|
||||||
void logger_set_terse(void);
|
const char *level, const char *facility);
|
||||||
|
|
||||||
void log_hint(const char *fmt, ...);
|
void logger_min_verbose(int minimum);
|
||||||
void log_verbose(int level, const char *fmt, ...);
|
|
||||||
|
|
||||||
extern int log_type;
|
extern int log_type;
|
||||||
extern int log_level;
|
extern int log_level;
|
||||||
|
|||||||
@@ -1,144 +1,62 @@
|
|||||||
###################################################
|
###################################################
|
||||||
# Replication Manager sample configuration file
|
# Replication Manager configuration file
|
||||||
###################################################
|
###################################################
|
||||||
|
|
||||||
# Required configuration items
|
# Cluster name
|
||||||
# ============================
|
cluster=test
|
||||||
#
|
|
||||||
# repmgr and repmgrd require these items to be configured:
|
|
||||||
|
|
||||||
# Cluster name - this will be used by repmgr to generate its internal
|
# Node ID
|
||||||
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
|
node=2
|
||||||
# to preserve case, we recommend using lower case and avoiding whitespace
|
node_name=standby2
|
||||||
# to facilitate easier querying of the repmgr views and tables.
|
|
||||||
cluster=example_cluster
|
|
||||||
|
|
||||||
# Node ID and name
|
# Connection information
|
||||||
# (Note: we recommend to avoid naming nodes after their initial
|
conninfo='host=192.168.204.104'
|
||||||
# replication funcion, as this will cause confusion when e.g.
|
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||||
# "standby2" is promoted to primary)
|
ssh_options=-o "StrictHostKeyChecking no"
|
||||||
node=2 # a unique integer
|
|
||||||
node_name=node2 # an arbitrary (but unique) string; we recommend using
|
|
||||||
# the server's hostname or another identifier unambiguously
|
|
||||||
# associated with the server to avoid confusion
|
|
||||||
|
|
||||||
# Database connection information as a conninfo string
|
|
||||||
# This must be accessible to all servers in the cluster; for details see:
|
|
||||||
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
|
|
||||||
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
|
|
||||||
|
|
||||||
# Optional configuration items
|
|
||||||
# ============================
|
|
||||||
|
|
||||||
# Replication settings
|
|
||||||
# ---------------------
|
|
||||||
|
|
||||||
# when using cascading replication and a standby is to be connected to an
|
|
||||||
# upstream standby, specify that node's ID with 'upstream_node'. The node
|
|
||||||
# must exist before the new standby can be registered. If a standby is
|
|
||||||
# to connect directly to a primary node, this parameter is not required.
|
|
||||||
#
|
|
||||||
# upstream_node=1
|
|
||||||
|
|
||||||
# physical replication slots - PostgreSQL 9.4 and later only
|
|
||||||
# (default: 0)
|
|
||||||
#
|
|
||||||
# use_replication_slots=0
|
|
||||||
#
|
|
||||||
# NOTE: 'max_replication_slots' should be configured for at least the
|
|
||||||
# number of standbys which will connect to the primary.
|
|
||||||
|
|
||||||
# Logging and monitoring settings
|
|
||||||
# -------------------------------
|
|
||||||
|
|
||||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
|
||||||
# (default: NOTICE)
|
|
||||||
loglevel=NOTICE
|
|
||||||
|
|
||||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
|
||||||
# (default: STDERR)
|
|
||||||
logfacility=STDERR
|
|
||||||
|
|
||||||
# stderr can be redirected to an arbitrary file:
|
|
||||||
#
|
|
||||||
# logfile='/var/log/repmgr.log'
|
|
||||||
|
|
||||||
# event notifications can be passed to an arbitrary external program
|
|
||||||
# together with the following parameters:
|
|
||||||
#
|
|
||||||
# %n - node ID
|
|
||||||
# %e - event type
|
|
||||||
# %s - success (1 or 0)
|
|
||||||
# %t - timestamp
|
|
||||||
# %d - details
|
|
||||||
#
|
|
||||||
# the values provided for "%t" and "%d" will probably contain spaces,
|
|
||||||
# so should be quoted in the provided command configuration, e.g.:
|
|
||||||
#
|
|
||||||
# event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
|
||||||
|
|
||||||
# By default, all notifications will be passed; the notification types
|
|
||||||
# can be filtered to explicitly named ones:
|
|
||||||
#
|
|
||||||
# event_notifications=master_register,standby_register,witness_create
|
|
||||||
|
|
||||||
|
|
||||||
# Environment/command settings
|
|
||||||
# ----------------------------
|
|
||||||
|
|
||||||
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
|
|
||||||
# (if not provided, defaults to system $PATH)
|
|
||||||
# pg_bindir=/usr/bin/
|
|
||||||
|
|
||||||
# external command options
|
|
||||||
|
|
||||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
|
||||||
# ssh_options=-o "StrictHostKeyChecking no"
|
|
||||||
|
|
||||||
# external command arguments
|
|
||||||
|
|
||||||
# pg_ctl_options='-s'
|
|
||||||
# pg_basebackup_options='--xlog-method=s'
|
|
||||||
|
|
||||||
|
|
||||||
# Standby clone settings
|
|
||||||
# ----------------------
|
|
||||||
#
|
|
||||||
# These settings apply when cloning a standby (`repmgr standby clone`).
|
|
||||||
|
|
||||||
# Tablespaces can be remapped from one file system location to another:
|
|
||||||
#
|
|
||||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
|
||||||
|
|
||||||
|
|
||||||
# Failover settings (repmgrd)
|
|
||||||
# ---------------------------
|
|
||||||
#
|
|
||||||
# These settings are only applied when repmgrd is running.
|
|
||||||
|
|
||||||
# Number of seconds to wait for a response from the primary server before
|
|
||||||
# deciding it has failed
|
|
||||||
|
|
||||||
|
# How many seconds we wait for master response before declaring master failure
|
||||||
master_response_timeout=60
|
master_response_timeout=60
|
||||||
|
|
||||||
# Number of times to try and reconnect to the primary before starting
|
# How many time we try to reconnect to master before starting failover procedure
|
||||||
# the failover procedure
|
|
||||||
reconnect_attempts=6
|
reconnect_attempts=6
|
||||||
reconnect_interval=10
|
reconnect_interval=10
|
||||||
|
|
||||||
# Autofailover options
|
# Autofailover options
|
||||||
failover=automatic # one of 'automatic', 'manual'
|
failover=manual
|
||||||
priority=100 # a value of zero or less prevents the node being promoted to primary
|
priority=-1
|
||||||
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
|
||||||
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
|
||||||
|
|
||||||
# monitoring interval in seconds; default is 2
|
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||||
|
# Default: NOTICE
|
||||||
|
loglevel=NOTICE
|
||||||
|
|
||||||
|
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||||
|
# Default: STDERR
|
||||||
|
logfacility=STDERR
|
||||||
|
|
||||||
|
# path to pg_ctl executable
|
||||||
|
pg_bindir=/usr/bin/
|
||||||
|
|
||||||
|
#
|
||||||
|
# you may add command line arguments for pg_ctl
|
||||||
|
#
|
||||||
|
# pg_ctl_options='-s'
|
||||||
|
|
||||||
|
#
|
||||||
|
# redirect stderr to a logfile
|
||||||
|
#
|
||||||
|
# logfile='/var/log/repmgr.log'
|
||||||
|
|
||||||
|
#
|
||||||
|
# change monitoring interval; default is 2s
|
||||||
#
|
#
|
||||||
# monitor_interval_secs=2
|
# monitor_interval_secs=2
|
||||||
|
|
||||||
# change wait time for primary; before we bail out and exit when the primary
|
#
|
||||||
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
|
# change wait time for master; before we bail out and exit when the
|
||||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
# master disappears, we wait 6 * retry_promote_interval_secs seconds;
|
||||||
# default value is 300)
|
# by default this would be half an hour (since sleep_delay default
|
||||||
|
# value is 300)
|
||||||
#
|
#
|
||||||
# retry_promote_interval_secs=300
|
# retry_promote_interval_secs=300
|
||||||
|
|||||||
32
repmgr.h
32
repmgr.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.h
|
* repmgr.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -27,30 +27,24 @@
|
|||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
#include "dbutils.h"
|
#include "dbutils.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
#define MIN_SUPPORTED_VERSION "9.3"
|
#define PRIMARY_MODE 0
|
||||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
#define STANDBY_MODE 1
|
||||||
|
#define WITNESS_MODE 2
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#define MAXFILENAME 1024
|
#define MAXFILENAME 1024
|
||||||
#define ERRBUFF_SIZE 512
|
#define ERRBUFF_SIZE 512
|
||||||
|
|
||||||
|
#define DEFAULT_CONFIG_FILE "./repmgr.conf"
|
||||||
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
|
||||||
#define DEFAULT_DEST_DIR "."
|
#define DEFAULT_DEST_DIR "."
|
||||||
#define DEFAULT_MASTER_PORT "5432"
|
#define DEFAULT_MASTER_PORT "5432"
|
||||||
#define DEFAULT_DBNAME "postgres"
|
#define DEFAULT_DBNAME "postgres"
|
||||||
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
|
||||||
#define DEFAULT_PRIORITY 100
|
|
||||||
#define FAILOVER_NODES_MAX_CHECK 50
|
|
||||||
|
|
||||||
#define MANUAL_FAILOVER 0
|
#define MANUAL_FAILOVER 0
|
||||||
#define AUTOMATIC_FAILOVER 1
|
#define AUTOMATIC_FAILOVER 1
|
||||||
#define NODE_NOT_FOUND -1
|
|
||||||
#define NO_UPSTREAM_NODE -1
|
|
||||||
#define UNKNOWN_NODE_ID -1
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Run time options type */
|
/* Run time options type */
|
||||||
typedef struct
|
typedef struct
|
||||||
@@ -65,29 +59,19 @@ typedef struct
|
|||||||
char superuser[MAXLEN];
|
char superuser[MAXLEN];
|
||||||
char wal_keep_segments[MAXLEN];
|
char wal_keep_segments[MAXLEN];
|
||||||
bool verbose;
|
bool verbose;
|
||||||
bool terse;
|
|
||||||
bool force;
|
bool force;
|
||||||
bool wait_for_master;
|
bool wait_for_master;
|
||||||
bool ignore_rsync_warn;
|
bool ignore_rsync_warn;
|
||||||
bool initdb_no_pwprompt;
|
bool initdb_no_pwprompt;
|
||||||
bool rsync_only;
|
|
||||||
bool fast_checkpoint;
|
bool fast_checkpoint;
|
||||||
bool ignore_external_config_files;
|
|
||||||
char masterport[MAXLEN];
|
char masterport[MAXLEN];
|
||||||
char localport[MAXLEN];
|
char localport[MAXLEN];
|
||||||
char loglevel[MAXLEN];
|
|
||||||
|
|
||||||
/* parameter used by CLUSTER CLEANUP */
|
/* parameter used by CLUSTER CLEANUP */
|
||||||
int keep_history;
|
int keep_history;
|
||||||
|
|
||||||
char pg_bindir[MAXLEN];
|
|
||||||
|
|
||||||
char recovery_min_apply_delay[MAXLEN];
|
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "", "", "", 0, "", "" }
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, "", "", 0}
|
||||||
|
|
||||||
extern char repmgr_schema[MAXLEN];
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.sql
|
* repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
#
|
#
|
||||||
# Makefile
|
# Makefile
|
||||||
#
|
# Copyright (c) 2ndQuadrant, 2010
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2016
|
|
||||||
#
|
#
|
||||||
|
|
||||||
MODULE_big = repmgr_funcs
|
MODULE_big = repmgr_funcs
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
/*
|
|
||||||
* Update a repmgr 2.x installation to repmgr 3.0
|
|
||||||
* ----------------------------------------------
|
|
||||||
*
|
|
||||||
* 1. Stop any running repmgrd instances
|
|
||||||
* 2. On the master node, execute the SQL statements listed below,
|
|
||||||
* taking care to identify the master node and any inactive
|
|
||||||
* nodes
|
|
||||||
* 3. Restart repmgrd (being sure to use repmgr 3.0)
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set the search path to the name of the schema used by
|
|
||||||
* your repmgr installation
|
|
||||||
* (this should be "repmgr_" + the cluster name defined in
|
|
||||||
* 'repmgr.conf')
|
|
||||||
*/
|
|
||||||
|
|
||||||
-- SET search_path TO 'name_of_repmgr_schema';
|
|
||||||
|
|
||||||
BEGIN;
|
|
||||||
|
|
||||||
ALTER TABLE repl_nodes RENAME TO repl_nodes2_0;
|
|
||||||
|
|
||||||
CREATE TABLE repl_nodes (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
type TEXT NOT NULL CHECK (type IN('master','standby','witness')),
|
|
||||||
upstream_node_id INTEGER NULL REFERENCES repl_nodes (id),
|
|
||||||
cluster TEXT NOT NULL,
|
|
||||||
name TEXT NOT NULL,
|
|
||||||
conninfo TEXT NOT NULL,
|
|
||||||
slot_name TEXT NULL,
|
|
||||||
priority INTEGER NOT NULL,
|
|
||||||
active BOOLEAN NOT NULL DEFAULT TRUE
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT INTO repl_nodes
|
|
||||||
(id, type, cluster, name, conninfo, priority)
|
|
||||||
SELECT id,
|
|
||||||
CASE
|
|
||||||
WHEN witness IS TRUE THEN 'witness'
|
|
||||||
ELSE 'standby'
|
|
||||||
END AS type,
|
|
||||||
cluster,
|
|
||||||
name,
|
|
||||||
conninfo,
|
|
||||||
priority + 100
|
|
||||||
FROM repl_nodes2_0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* You'll need to set the master explicitly; the following query
|
|
||||||
* should identify the master node ID but will only work if all
|
|
||||||
* standby servers are connected:
|
|
||||||
*
|
|
||||||
* SELECT id FROM repmgr_test.repl_nodes WHERE name NOT IN (SELECT application_name FROM pg_stat_replication)
|
|
||||||
*
|
|
||||||
* If in doubt, execute 'repmgr cluster show' will definitively identify
|
|
||||||
* the master.
|
|
||||||
*/
|
|
||||||
UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;
|
|
||||||
|
|
||||||
/* If any nodes are known to be inactive, update them here */
|
|
||||||
|
|
||||||
-- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);
|
|
||||||
|
|
||||||
/* When you're sure of your changes, commit them */
|
|
||||||
|
|
||||||
-- COMMIT;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* execute the following command when you are sure you no longer
|
|
||||||
* require the old table:
|
|
||||||
*/
|
|
||||||
|
|
||||||
-- DROP TABLE repl_nodes2_0;
|
|
||||||
@@ -9,7 +9,6 @@
|
|||||||
#include "fmgr.h"
|
#include "fmgr.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "replication/walreceiver.h"
|
|
||||||
#include "storage/ipc.h"
|
#include "storage/ipc.h"
|
||||||
#include "storage/lwlock.h"
|
#include "storage/lwlock.h"
|
||||||
#include "storage/procarray.h"
|
#include "storage/procarray.h"
|
||||||
@@ -59,7 +58,6 @@ PG_FUNCTION_INFO_V1(repmgr_update_last_updated);
|
|||||||
PG_FUNCTION_INFO_V1(repmgr_get_last_updated);
|
PG_FUNCTION_INFO_V1(repmgr_get_last_updated);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Module load callback
|
* Module load callback
|
||||||
*/
|
*/
|
||||||
@@ -232,5 +230,3 @@ repmgr_get_last_updated(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
PG_RETURN_TIMESTAMPTZ(last_updated);
|
PG_RETURN_TIMESTAMPTZ(last_updated);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr_function.sql
|
* repmgr_function.sql
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* uninstall_repmgr_funcs.sql
|
* uninstall_repmgr_funcs.sql
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2016
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.c
|
* strutil.c
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -25,9 +25,15 @@
|
|||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= 90100)
|
||||||
static int
|
static int
|
||||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||||
|
#else
|
||||||
|
static int
|
||||||
|
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||||
|
__attribute__((format(printf, 3, 0)));
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||||
|
|||||||
16
strutil.h
16
strutil.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.h
|
* strutil.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
@@ -31,6 +31,7 @@
|
|||||||
#define MAXCONNINFO 1024
|
#define MAXCONNINFO 1024
|
||||||
|
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= 90100)
|
||||||
extern int
|
extern int
|
||||||
xsnprintf(char *str, size_t size, const char *format,...)
|
xsnprintf(char *str, size_t size, const char *format,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
@@ -42,5 +43,18 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
|||||||
extern int
|
extern int
|
||||||
maxlen_snprintf(char *str, const char *format,...)
|
maxlen_snprintf(char *str, const char *format,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||||
|
#else
|
||||||
|
extern int
|
||||||
|
xsnprintf(char *str, size_t size, const char *format,...)
|
||||||
|
__attribute__((format(printf, 3, 4)));
|
||||||
|
|
||||||
|
extern int
|
||||||
|
sqlquery_snprintf(char *str, const char *format,...)
|
||||||
|
__attribute__((format(printf, 2, 3)));
|
||||||
|
|
||||||
|
extern int
|
||||||
|
maxlen_snprintf(char *str, const char *format,...)
|
||||||
|
__attribute__((format(printf, 2, 3)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _STRUTIL_H_ */
|
#endif /* _STRUTIL_H_ */
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* uninstall_repmgr.sql
|
* uninstall_repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2016
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user