Compare commits

..

262 Commits

Author SHA1 Message Date
Ian Barwick
51aa63c8f9 Update version and history for minor release 2.0.2 2015-02-17 16:06:00 +09:00
Ian Barwick
e53162deb8 Fix master port check
Check introduced in dc0dfe9b56
was comparing the provided database name instead of the port.
2015-02-12 14:43:53 +09:00
Jaime Casanova
6a8336b880 Add "--checksum" in rsync when using "--force"
If the user don't put that option in rsync_options using of "--force"
could be unsafe.
While the probability of failures because of this are low they aren't
zero.
2015-02-10 20:28:17 -05:00
Marco Nenciarini
4a445e7f8a Fix syntax errors in repmgr.c 2014-11-10 12:38:18 -05:00
Jaime Casanova
3c1d72a5ea Code review: Do not use psql on do_witness_create,
use createdb and createuser binaries instead
2014-11-10 12:37:10 -05:00
Martín Marqués
d4b9a32a86 errcode.h is a local header. 2014-11-10 12:36:17 -05:00
Martín Marqués
07a216ca25 If the user doesn't pass the port on which the primary server is listening
we have to assume it's the DEFAULT_MASTER_PORT.

This was not done, so we added a check to see if it has a value that is
usable, else we use DEFAULT_MASTER_PORT.
2014-11-10 12:34:30 -05:00
Ian Barwick
d3c067f1bd Clarify repmgr database role
Conflicts:
	QUICKSTART.md
2014-11-10 12:33:26 -05:00
Ian Barwick
e6caf11bf2 Fix pg_hba.conf example
Conflicts:
	QUICKSTART.md
2014-11-10 12:30:33 -05:00
Ian Barwick
9909881d81 Update HISTORY for minor release 2.0.1 2014-11-10 12:27:07 -05:00
Ian Barwick
8073a294f0 Formatting fixes
Conflicts:
	QUICKSTART.md
2014-11-10 12:26:28 -05:00
Ian Barwick
bf5e0b9b48 Correct year in specfile changelog 2014-11-10 12:21:49 -05:00
Ian Barwick
2e9f4aa30f Convert QUICKSTART file to markdown format
Less effort for more consistent formatting (at least the way
github renders it).
2014-11-10 12:12:25 -05:00
Ian Barwick
0dcacc3a70 Formatting fixes 2014-11-10 12:11:42 -05:00
Ian Barwick
65120c47cf Fix formatting 2014-11-10 12:10:47 -05:00
Ian Barwick
f9397c0f06 Add a "quickstart" guide
Provides a succinct overview of the steps needed to get repmgr
up and running as.
2014-11-10 12:04:02 -05:00
Ian Barwick
af3c865b05 Fix log messages in do_standby_promote()
Initial connection is to current standby, before attempting to
connect to old master.
2014-11-10 10:44:55 -05:00
Ian Barwick
112a11a311 Typo fixes 2014-11-10 10:42:39 -05:00
Ian Barwick
7b87b5eddd Change successful standby promotion message to log level 'NOTICE'
Was previously 'ERROR'.
2014-11-10 10:40:31 -05:00
Ian Barwick
1aa36ca1c1 Properly specify rsync --exclude directories
Using '--exclude=dirname/*' to explicitly specify directories whose contents
should not be copied. This will result in empty directories being created
on the destination if they exist on the source, but that's not a problem as
they are needed anyway.

Previously the generated rsync command contained '--exclude=pg_log*', which
will break replication on 9.5 as the wildcard expansion prevents the
'pg_logical' directory from being copied.
2014-11-09 18:16:08 -05:00
Ian Barwick
a7eff1f39e Typo fixes and minor wording tweaks for clarity 2014-11-09 17:25:47 -05:00
Riegie Godwin Jeyaranchen
e64e230559 Update README.rst
Fixing a grammar mistake.
2014-11-09 17:15:42 -05:00
Nathan Van Overloop
bba167db9e init script: make status call return proper return code 2014-11-09 11:13:47 -05:00
Nathan Van Overloop
2676adcaed re-add comment full debug of log.c 2014-11-09 11:04:39 -05:00
Nathan Van Overloop
5a27d5e57b on init of witness server create db and user to avoid using postgres 2014-11-09 10:55:09 -05:00
Nathan Van Overloop
4071589ba5 adapt makefile for RHEL + RHEL specific files 2014-11-09 10:51:40 -05:00
brynhood
6cb2376974 Makefile: create bindir before instal + force dir
in order to facilitate building of an rpm I've added an / to the end of the dirs.
2014-11-07 15:25:45 -05:00
PriceChild
235c98a0b5 Typo in example command. 2014-11-07 15:13:23 -05:00
Warren Moore
16da2f48c2 keep naming consistent 2014-11-07 15:12:49 -05:00
Warren Moore
c23e5858f2 fix: witness creation and monitoring
While reading node entries from master use a separate PGresult when inserting into witness.
Witness monitoring supplies a null value for 'last_apply_time'.
2014-11-07 15:09:05 -05:00
József Kószó
30ccee43d9 debian init script and config file documentation fixes 2014-11-07 15:02:27 -05:00
József Kószó
9357f89d12 debian init script and config file documentation fixes 2014-11-07 15:02:03 -05:00
József Kószó
48da11acfd debian init script and config file documentation fixes 2014-11-07 14:40:53 -05:00
Christian Kruse
07c54c296c removed old comment 2014-11-07 13:49:13 -05:00
Christian Kruse
8f0b9592e8 no longer use global variable for SQL query buffer 2014-11-07 13:47:58 -05:00
Christian Kruse
b35bf3f91d removed no-longer used variable 2014-11-07 13:47:18 -05:00
Christian Kruse
04c101c5f0 rather big refactoring: use a naming scheme
In the past naming of functions, variables and such didn't really have a
naming scheme. Now they should have.

This is backpatched from master (2.1dev) just because it will be easier
to backpatch other fixes.
2014-11-07 13:46:04 -05:00
Christian Kruse
65989840d2 avoid usage of snprintf()
We have a nice little abstraction for snprintf with covering the case
that a string is too big for the target buffer – let's use that!
2014-11-07 13:44:23 -05:00
Christian Kruse
24bd4e7a3f completely avoid usage of strnlen() 2014-11-07 13:40:20 -05:00
Christian Kruse
1c67e105ff pg_indent'ing all files…
Conflicts:
	version.h
2014-11-07 13:32:29 -05:00
Christian Kruse
069f9ff2ed version push 2014-03-17 14:26:56 +01:00
Christian Kruse
b8ade8e908 fixing some documentation errors 2014-03-10 15:51:55 +01:00
Christian Kruse
c0abb3be31 Merge branch 'master' into REL2_0_STABLE 2014-03-06 15:23:52 +01:00
Christian Kruse
fed5c77653 various improvements and bugfixes in the init script 2014-03-06 15:23:22 +01:00
Christian Kruse
8429b43edf Merge pull request #14 from wamonite/fix_follow_user
fix: store the master connection user name on standby follow
2014-03-06 15:20:02 +01:00
Warren Moore
7e55ce737d fix: store the master connection user name on standby follow 2014-03-05 16:49:56 +00:00
Christian Kruse
98c7635fb5 fixing more compiler warnings 2014-03-04 17:58:36 +01:00
Christian Kruse
90ecb2b107 fix: check return values of freopen()
Some compiles complain about not checking the return value of freopen(),
so we check it
2014-03-04 15:32:48 +01:00
Christian Kruse
50b9022a41 fix: don't use Windows newlines 2014-03-04 12:59:23 +01:00
Christian Kruse
150ccc0662 add option to avoid repmgrd started upon installation
Now repmgr.repmgrd.default has another option: REPMGRD_ENABLED. Valid
values are either yes or no.
2014-03-04 12:46:05 +01:00
Christian Kruse
0a71123920 Merge branch 'master' into REL2_0_STABLE 2014-03-03 09:25:08 +01:00
Christian Kruse
0ff14a2aa1 avoid compiler warnings 2014-02-21 13:47:29 +01:00
Christian Kruse
5215265694 fix: now CloseConnections() is much more safe 2014-02-18 17:06:36 +01:00
Christian Kruse
e45ac25348 fix: progname is const, do not free it
The leak is irrelevant
2014-02-18 16:45:35 +01:00
Christian Kruse
a1ce01f033 fix: fixed some leaks 2014-02-18 16:35:29 +01:00
Christian Kruse
516cde621a fix: strcpy() on overlapping memory regions is invalid 2014-02-18 15:42:20 +01:00
Christian Kruse
f0807923a3 fix: gettimeofday() expects two arguments 2014-02-18 15:33:56 +01:00
Christian Kruse
10ca8037f8 added some more log messages
Now we should be able to distinguish different events more easily
2014-02-18 14:10:12 +01:00
Christian Kruse
0dc46f0dc8 fix: set connection to NULL when finishing it
This will avoid CloseConnections() to try to close an already closed connection.
2014-02-18 13:42:49 +01:00
Christian Kruse
c3b58658ad fixing repmgr repl_status columns
repmgr repl_status had the column time_lag which was documented to be
the time a standby is behind master. In fact it only works like this
when viewed on the standby and not on the master: there it only was the
time of the last status update. We dropped that column and replaced it
by a new column „communication_time_lag“ which is the content of the
repl_status column on the master. On the standby we contain the time of
the last update in shared mem though refer always to the correct time
nonetheless where repl_status is queried. We also added a new column,
„replication_time_lag“, which refers to the apply delay.
2014-02-15 01:35:27 +01:00
Christian Kruse
18f1fed77f fixing wait_connection_availability()
wait_connection_availability() did take at least 2 seconds per call in
the old incarnation. Now we may finish a call without any sleep at all
when the result is already ready at the time called
2014-02-15 01:31:12 +01:00
Christian Kruse
d58fd080ca flush stderr after a log message appears
We had the problem that the log file appeared empty for a long time due
to file buffers. Thus we call fflush() after every log message so the
log file gets written out to disk quickly
2014-02-15 01:29:12 +01:00
Christian Kruse
c4ac2d3343 fixing PQexec() calls
fixing several calls where we did not check the result status but only
the return value; the query may fail nonetheless
2014-02-15 01:27:53 +01:00
Christian Kruse
a72c2296e9 Merge branch 'master' into REL2_0_STABLE 2014-02-11 09:28:40 +01:00
Christian Kruse
5ff1beeea7 do not enable autofailover by default
Autofailover is an experimental feature which should not be enabled by
default. The user has to be aware of what he is doing when enabling it.
2014-02-11 09:27:31 +01:00
Christian Kruse
9c3d79147b now version.h contains the right version 2014-02-07 21:47:39 +01:00
Christian Kruse
ca470647cb cleanup of usage text
Now it properly aligns and breaks at 78 characters.
2014-01-30 14:26:17 +01:00
Christian Kruse
62ee287e3f updated TODO 2014-01-30 14:10:14 +01:00
Christian Kruse
729a1b848a release notes for 2.0 stable 2014-01-30 13:59:17 +01:00
Christian Kruse
701cf043fd fix: seems as if I missread -hackers 2014-01-23 16:46:49 +01:00
Christian Kruse
bbb67c55f6 simple past of set is set 2014-01-23 10:50:37 +01:00
Christian Kruse
c2c48a9fe6 removed already finished TODO tasks 2014-01-23 10:48:04 +01:00
Christian Kruse
9d6ac2ebf9 fixed documentation and line endings 2014-01-23 10:39:21 +01:00
Christian Kruse
680f23fb1d copyright push 2014-01-23 10:37:49 +01:00
Christian Kruse
1159113c58 ignore the dynamic shared memory directory, too 2014-01-23 10:02:32 +01:00
Christian Kruse
f25a709454 added an explicit type cast to avoid compiler warnings 2014-01-22 15:17:47 +01:00
Christian Kruse
897daddcc7 removed not needed arguments to avoid compiler warnings 2014-01-22 15:17:28 +01:00
Christian Kruse
0fdcce0477 use if instead of switch and avoid a warning 2014-01-22 15:12:29 +01:00
Christian Kruse
de58eff7c1 added a chdir() for proper daemonizing 2014-01-22 14:30:38 +01:00
Christian Kruse
f2a0b31a20 more log format fixes 2014-01-22 14:30:24 +01:00
Christian Kruse
e007a55967 fix: do not use fsync()
We do not need fsync(), the fflush() is enough to avoid concurrent
logs.
2014-01-22 11:47:50 +01:00
Christian Kruse
d235c696af fix: do not newline at the start of a log line
This breaks the log file format since it will have a line break directly
after the timestamp
2014-01-22 11:47:02 +01:00
Christian Kruse
4ef6fbb5fe do not close stderr but reopen it to /dev/null
We want stderr to be always a valid file descriptor
2014-01-21 16:25:57 +01:00
Christian Kruse
2e61d7b156 refactoring: daemonizing is now a function 2014-01-21 16:19:49 +01:00
Christian Kruse
4496a0761e we now use a function and are more sophisticated
Refactoring part: we now use a function to generate the PID
file. Sophistication: we now check if the PID contained in the file is a
valid PID. We ignore the file if it doesn't.
2014-01-21 16:18:15 +01:00
Christian Kruse
3978ead184 use a second fork to avoid a terminal
after the setsid() we are the process leader. And as a process leader we
are able to open a new terminal, even if we currently don't own one. So
we do another fork and do not call setsid() and not become a process
leader to avoid that.
2014-01-21 15:51:33 +01:00
Christian Kruse
b36dbf61fe reopening stdin and stdout to /dev/null now
stdin, stdout and stderr should always be valid file handles. Thus we
don't close them but reopen them to /dev/null
2014-01-21 15:31:38 +01:00
Christian Kruse
84466ecca5 log_crit() is more appropriate 2014-01-21 15:23:20 +01:00
Christian Kruse
649086e5e4 use unlink() instead of remove()
`remove()` will do a rmdir if necessary - we don't want that. So we use `unlink()`
2014-01-21 15:22:31 +01:00
Christian Kruse
7cf2eb440d renamed config options to a much more descriptive name 2014-01-21 15:19:50 +01:00
Christian Kruse
388bbfb773 split install target into install_prog and install_ext
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:23:33 +01:00
Christian Kruse
a89aa02c68 fix: make pg_config be settable from outside the makefile
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:22:59 +01:00
Christian Kruse
c81793b63f fix: added forgotten options.priority value
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:18:12 +01:00
Christian Kruse
b4e83cf188 Add format attribute checking for printf() like functions
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:14:36 +01:00
Christian Kruse
1db61ce277 fix: fail when repmgr_funcs is not pre-loaded
when repmgr_funcs is not pre-loaded `repmgr_update_standby_location()`
will return false and `repmgr_get_last_standby_location()` will return
an empty string. Thus we may end in an endless loop. To avoid that we fail.
2014-01-21 13:54:10 +01:00
Christian Kruse
41abf9a7ef fix: flushing and fsync()ing the log file
When not flushing and fsync()ing it the output may be garbled due to
concurrent writes to the file (system() spawns a child process with
stdin/stdout/stderr inherited from it's parent)
2014-01-21 13:52:27 +01:00
Christian Kruse
abebc53ddc fix: sscanf() does not set variables to 0 on error 2014-01-21 13:48:41 +01:00
Christian Kruse
5fc4a0382f added config options sleep_delay and sleep_monitor
sleep_monitor replaces the old SLEEP_MONITOR define and makes it
configurable; this is the interval in which we monitor

sleep_delay replaces the old sleep(300) when waiting for the master to
recover.
2014-01-17 14:35:50 +01:00
Christian Kruse
a7d3c9b93a fix: also close stderr when using syslog logging 2014-01-17 12:14:26 +01:00
Christian Kruse
ee9dc9e247 do not use exit()
We avoid using exit() to be able to clean up when we have to
terminate. This includes removal of the PID file as well as closing
database connections.
2014-01-17 11:28:55 +01:00
Christian Kruse
94cb5b94e7 fix: reopen log file on SIGHUP 2014-01-16 17:16:45 +01:00
Christian Kruse
a08aa50f92 fix: close stdin and stdout only in repmgrd
closing stdin and stdout might cause problems when using system(), so we
avoid it.
2014-01-16 16:01:58 +01:00
Christian Kruse
9563877fbb new config option, stdout/stdin closed
Now stdin and stdout get closed. Additionally stderr gets closed and
reopened to the new config option „logfile“ if specified
2014-01-16 15:22:34 +01:00
Christian Kruse
4f3bd6612c do not exit in getMasterConnection() 2014-01-16 15:07:15 +01:00
Christian Kruse
192ee3cdb0 do not exit in get_cluster_size 2014-01-16 15:07:06 +01:00
Christian Kruse
6f149ead8f do not exit in guc_setted and guc_setted_typed 2014-01-16 14:48:46 +01:00
Christian Kruse
77aa6aa326 do not exit in pg_version 2014-01-16 14:48:42 +01:00
Christian Kruse
18206b3a64 do not exit() in is_witness 2014-01-16 14:28:56 +01:00
Christian Kruse
91446bcf93 fix: do not try to reconnect infinitely 2014-01-10 17:26:02 +01:00
Christian Kruse
dcdf8788ae fix: handle connection loss to standby
We do basically the same as we do for the master since connections drop
from time to time
2014-01-10 17:12:03 +01:00
Christian Kruse
4fabfbbbd0 fix: do not exit in is_standby()
Instead we now return an int with 0 meaning „not a standby,“ 1 meaning
„is a standby“ and -1 meaning „connection dropped“
2014-01-10 17:11:16 +01:00
Christian Kruse
c41030b40e Merge branch 'REL2_0_STABLE'
Conflicts:
	HISTORY
	dbutils.h
	repmgr.c
	repmgrd.c
	version.h
2014-01-10 16:07:33 +01:00
Christian Kruse
a0fdadd5d2 this way it is much cleaner 2014-01-09 15:35:44 +01:00
Christian Kruse
4c3d7f80ed now code compiles with -ansi -pedantic and has less warnings 2014-01-09 14:45:07 +01:00
Christian Kruse
6e3fe059d8 added config options pg_bindir and pg_ctl_options 2014-01-09 14:44:34 +01:00
Christian Kruse
9f26254ac3 fix: added some missing initializers to avoid compiler warning 2014-01-09 13:33:22 +01:00
Christian Kruse
0e8ff1730e added handling of a PID file 2014-01-09 13:04:40 +01:00
Christian Kruse
634fdff303 fix: do not call setup_event_handlers() on WIN32
If we put setup_event_handlers() in #ifdef WIN32, we have to do it for
the call and the declaration, too
2014-01-09 12:57:16 +01:00
Christian Kruse
cbce29f009 fixed typos 2014-01-08 11:55:03 +01:00
Christian Kruse
920f925e4b added a new cli option --daemonize
This option forks the process and generates a new session. This
effectively detaches it from the shell. Don't forget to redirect stderr
or use syslog for logging!
2014-01-08 11:53:15 +01:00
Christian Kruse
9fe2d6886e white space cleanup 2014-01-07 16:42:06 +01:00
Christian Kruse
0068dd573a fix: do not compare pointers but the strings 2014-01-07 15:52:29 +01:00
Christian Kruse
d0f3cb59c7 fix: create data directory after sanity check 2014-01-07 14:42:55 +01:00
Christian Kruse
7428e92e10 fix: correctly check the return value of PQexec()
not only check if return value is not NULL but also check that the
returned result is a PGRES_COMMAND_OK (e.g. the INSERT was successful)
2014-01-07 14:27:31 +01:00
Christian Kruse
a97065113d fix: remove own node earlier if force is set
We have to remove our own node before we check for a new master if force
is set; else master register would fail on the second time since there
already is a master (ourselves), even if we specify -F
2014-01-07 14:16:58 +01:00
Christian Kruse
9e2f276fcf fix: do not exit after pg_start_backup() w/o pg_stop_backup() 2014-01-07 14:02:29 +01:00
Christian Kruse
b0cd2b5e43 fix: do not exit() in create_pgdir()
This could leave the database in a locked state (pg_start_backup()).
And since all calls to create_pgdir() handle the return value correctly
we simply replace the exit() by a return false
2014-01-07 14:01:46 +01:00
Jaime Casanova
9209248420 Fix oversight in the header of guc_setted_typed() 2013-12-19 11:09:08 -05:00
Jaime Casanova
6693b99288 Files to create the debian package
Patch by: Christian Kruse
2013-12-19 01:43:12 -05:00
Jaime Casanova
8e7b487838 Update debian control file 2013-12-19 01:41:24 -05:00
Jaime Casanova
7f796e2d15 Update history and credit files 2013-12-19 01:40:00 -05:00
Jaime Casanova
5e04ab6eae Add a ssh_options parameter to allow ssh checking
to consider non-default values (ie: a different port)

Patch by Jay Taylor
2013-12-19 01:22:55 -05:00
Jaime Casanova
a1f4285e2b Add guc_setted_typed() function to allow
wal_keep_segmeents to be checked as an integer instead
of text

Patch by Jay Taylor
2013-12-19 01:22:42 -05:00
Jaime Casanova
493133986d Add timestamps to log line in stderr
Patch by Christian Kruse
2013-12-19 01:15:28 -05:00
Jaime Casanova
8b370dc581 Fix some typos
Patch by Krzysztof Gajdemski
2013-12-07 13:25:46 -05:00
Jaime Casanova
43af00aa12 Ignore pg_log when cloning, just like we ignore pg_xlog 2013-12-04 01:23:48 -05:00
Jaime Casanova
3c8df59eb9 Make repmgr compile in 9.3.
Patch provided by Shawn Ellis with some fixes by me.
2013-11-14 00:43:35 -05:00
Jaime Casanova
b410772627 Rework algorithm to coordinate voting
Make this by waiting for all nodes to finish a step, before starting
a new one. So everyone starts promoting or following in a coordinated
fashion.
Also make a few fixes.
2013-09-26 13:24:31 -05:00
Jaime Casanova
d99024ba11 Make repmgrd survive to the failover
To do this it needs to reconnect to the new master
2013-09-26 11:58:59 -05:00
Jaime Casanova
1afaa3a26f Rearrange the logic in do_failover() for further improvements.
Specially, make this a more coordinated process by making all
nodes waiting for the others before going to the next step.

This is one step further in following Andres Freund advices
but there is still a lot to do in order to complete that,
specially it could be needed to add more fields to repl_nodes
and to the shm area.
2013-09-23 18:28:58 -05:00
Jaime Casanova
079a7c9f16 In a failover situation get the nodes in a well defined order.
When deciding which node will be the new master, we should get the
nodes in a well defined order otherwise two standbys could process
nodes with the same priority in different order and end up with
a two master situation.
2013-07-26 00:59:50 -05:00
Jaime Casanova
3b66a31ac9 In a failover situation get the nodes in a well defined order.
When deciding which node will be the new master, we should get the
nodes in a well defined order otherwise two standbys could process
nodes with the same priority in different order and end up with
a two master situation.
2013-07-26 00:52:31 -05:00
Jaime Casanova
bdf957ca52 Add a missing ')'. This is a typo introduced in commit
2bc8044fda

Per complaint from Carlos Chapi when compiling for a customer.
2013-07-13 12:39:13 -05:00
Jaime Casanova
ad3630e7a9 Add a missing ')'. This is a typo introduced in commit
2bc8044fda

Per complaint from Carlos Chapi when compiling for a customer.
2013-07-13 12:37:15 -05:00
Jaime Casanova
67b451aa45 If PQgetCancel() returns NULL we should also return false.
Noted by Andres Freund.
2013-07-12 08:03:36 -05:00
Jaime Casanova
0a70d907ae Improve messages in wait_connection_availability, so we know what
error makes the failover procedure to start

By gripe from Andres Freund
2013-07-12 08:03:25 -05:00
Jaime Casanova
2e7acf03c4 If PQgetCancel() returns NULL we should also return false.
Noted by Andres Freund.
2013-07-12 08:01:01 -05:00
Jaime Casanova
2bc8044fda Improve messages in wait_connection_availability, so we know what
error makes the failover procedure to start

By gripe from Andres Freund
2013-07-10 19:25:58 -05:00
Jaime Casanova
ab1d380843 If PQcancel() fails, consider it as if the master is failing.
Because PQcancel() establish a new synchronous connection to the
database, if it fails it means something wrong has happenned with
master. So instead of just ignore the failure, CancelQuery() now
reports a failure condition so we can detect master's death in
that situation.

This is very important specially when only postmaster crashes but
other children/backend connections are still there. Because the
children connection won't fail and CancelQuery() failure is our
only indication of something wrong happenning.
Currently we just ignore the PQcancel() failure which leads us to
a situation in which we just loop forever
trying to cancel the async query.

Reported by: Martin Euser <martin.euser@nl.abnamro.com>
Problem analyzed and bug spotted by: Andres Freund <andres@2ndquadrant.com>
Patch by: Jaime Casanova <jaime@2ndquadrant.com>
2013-07-10 10:21:51 -05:00
Jaime Casanova
b0b44a157f If PQcancel() fails, consider it as if the master is failing.
Because PQcancel() establish a new synchronous connection to the
database, if it fails it means something wrong has happenned with
master. So instead of just ignore the failure, CancelQuery() now
reports a failure condition so we can detect master's death in
that situation.

This is very important specially when only postmaster crashes but
other children/backend connections are still there. Because the
children connection won't fail and CancelQuery() failure is our
only indication of something wrong happenning.
Currently we just ignore the PQcancel() failure which leads us to
a situation in which we just loop forever
trying to cancel the async query.

Reported by: Martin Euser <martin.euser@nl.abnamro.com>
Problem analyzed and bug spotted by: Andres Freund <andres@2ndquadrant.com>
Patch by: Jaime Casanova <jaime@2ndquadrant.com>
2013-07-10 09:53:45 -05:00
Jaime Casanova
49a2531930 Options -F -W -I -v doesn't accept arguments, which means that on
getopt_long shouldn't be marked with the colon (:) character.

This has been wrong since day one, so backpatching all the way until
1.1
2013-01-13 16:37:39 -05:00
Jaime Casanova
672b237c4e Options -F -W -I -v doesn't accept arguments, which means that on
getopt_long shouldn't be marked with the colon (:) character.

This has been wrong since day one, so backpatching all the way until
1.1
2013-01-13 16:32:56 -05:00
Jaime Casanova
7d94151494 If the node is a witness don't bother asking its position, it always
will be 0/0. We just need to check that we can connect to it to determine
if we are in the majority.
2013-01-11 03:44:50 -05:00
Jaime Casanova
4191b77e70 If the node is a witness don't bother asking its position, it always
will be 0/0. We just need to check that we can connect to it to determine
if we are in the majority.
2013-01-11 03:42:08 -05:00
Jaime Casanova
2a5d431481 Fix a problem that caused a standby to promote itself without going to
voting procedure.

This is because of a race condition inside CheckPrimaryConnection().

This has independently reported by Alex Railean and Dumitru, and Frank Jördens.
Analyzed and fixed by Cédric Villemain.

The fix have been verified to work by Frank
2012-12-19 12:01:27 -05:00
Jaime Casanova
81b8a944de Fix a problem that caused a standby to promote itself without going to
voting procedure.

This is because of a race condition inside CheckPrimaryConnection().

This has independently reported by Alex Railean and Dumitru, and Frank Jördens.
Analyzed and fixed by Cédric Villemain.

The fix have been verified to work by Frank
2012-12-19 11:45:58 -05:00
Jaime Casanova
93a999adc7 Formatting code using astyle 2012-12-11 11:49:07 -05:00
Jaime Casanova
1b69282df9 Formatting code using astyle 2012-12-11 11:47:59 -05:00
Jaime Casanova
06dd252f69 To select new master it needs to know which standby has received more
xlog records from master, so it standby should use pg_last_xlog_receive_location()
to report their positions. This solves a possible situation in which
a standby that is considered as new master when promoted is no longer
the best option.
2012-12-03 09:27:12 -05:00
Jaime Casanova
088ca29fe3 To select new master it needs to know which standby has received more
xlog records from master, so it standby should use pg_last_xlog_receive_location()
to report their positions. This solves a possible situation in which
a standby that is considered as new master when promoted is no longer
the best option.
2012-12-03 09:18:08 -05:00
Jaime Casanova
30e9d06172 Add an option for STANDBY FOLLOW to wait for a master to appear.
This is important for autofailover to do the right thing when
standbys detected master death at different times.

While this is a new option, seems important for the autofailover
to work properly so i will consider the lack of it a bug and
will backpatch to 2.0 where autofailover was introduced.

For gripe from Alex Railean, about a standby not finding the new
master because the new master hasn't finish promoting.
2012-11-14 15:09:26 -05:00
Jaime Casanova
d6bd5aa381 Add an option for STANDBY FOLLOW to wait for a master to appear.
This is important for autofailover to do the right thing when
standbys detected master death at different times.

While this is a new option, seems important for the autofailover
to work properly so i will consider the lack of it a bug and
will backpatch to 2.0 where autofailover was introduced.

For gripe from Alex Railean, about a standby not finding the new
master because the new master hasn't finish promoting.
2012-11-14 15:07:59 -05:00
Gabriele Bartolini
bbdcffa813 Fixed typos notified by lintian 2012-11-09 18:09:43 +01:00
Jaime Casanova
cd1a84252e Fix node decision logic when priorities are involved. Currently if
two nodes with different prorities are equally good to be promoted
the second one (with a lower priority, considering them
in descending order) will win.

Per report from Brailean Dumitru
2012-09-16 02:47:02 -05:00
Jaime Casanova
5f33d9d715 Fix node decision logic when priorities are involved. Currently if
two nodes with different prorities are equally good to be promoted
the second one (with a lower priority, considering them
in descending order) will win.

Per report from Brailean Dumitru
2012-09-16 02:38:28 -05:00
Jaime Casanova
2e19b3688b Add a comment 2012-09-16 02:26:18 -05:00
Jaime Casanova
877f4cf82e Add a comment 2012-09-16 02:23:16 -05:00
Jaime Casanova
de883a4c84 Keep compiler quiet. Noted when compiling in FreeBSD in which i
get a warning for an uninitialized variable.

Also, define InvalidXLogRecPtr. We don't really need it but using
it make the initialization future proof (considering that in 9.3
XLogRecPtr will change its structure).
2012-09-16 02:21:18 -05:00
Jaime Casanova
949f5ee498 Keep compiler quiet. Noted when compiling in FreeBSD in which i
get a warning for an uninitialized variable.

Also, define InvalidXLogRecPtr. We don't really need it but using
it make the initialization future proof (considering that in 9.3
XLogRecPtr will change its structure).
2012-09-16 02:10:02 -05:00
Jaime Casanova
eb2f7efb4a When we have more command-line arguments than we should have we
need to show that last value and we should use only optind for that
instead of optind+1
2012-09-15 17:39:10 -05:00
Jaime Casanova
85ff3ec286 Fix documentation to always use -h sintax to refer to the node we
want to clone or connect to, instead of relying on the fact that
for some time putting that argument at last worked.
2012-09-15 17:38:42 -05:00
Jaime Casanova
499a501afd Make repmgr compatible with FreeBSD.
We need to add an #include and make it use a different path for the
"true" binary.

Maybe we need to make this changes for all BSD systems but having no
evidence of that i prefer to make this only for systems with __FreeBSD__
2012-09-15 17:37:59 -05:00
Jaime Casanova
0a9107d76d Improve sample of commands for promote and follow 2012-09-15 17:37:43 -05:00
Jaime Casanova
2803bb92a8 Make repmgr compatible with FreeBSD.
We need to add an #include and make it use a different path for the
"true" binary.

Maybe we need to make this changes for all BSD systems but having no
evidence of that i prefer to make this only for systems with __FreeBSD__
2012-09-15 17:32:38 -05:00
Jaime Casanova
16fe41eecf Improve sample of commands for promote and follow 2012-09-11 15:53:57 -05:00
Jaime Casanova
95ec0450da When we have more command-line arguments than we should have we
need to show that last value and we should use only optind for that
instead of optind+1
2012-08-30 02:11:48 -05:00
Jaime Casanova
57aa95f674 Fix documentation to always use -h sintax to refer to the node we
want to clone or connect to, instead of relying on the fact that
for some time putting that argument at last worked.
2012-08-30 02:10:10 -05:00
Jaime Casanova
d365a309fc Fix HISTORY to show from newest to oldest 2012-07-27 11:29:07 -05:00
Jaime Casanova
56d2ae4e81 Fix HISTORY to show from newest to oldest 2012-07-27 11:26:18 -05:00
Jaime Casanova
d5a41bb587 Fix tabs in HISTORY 2012-07-27 11:22:04 -05:00
Jaime Casanova
474d3217b4 Fix typos in RELEASE NOTES 2012-07-27 11:21:49 -05:00
Jaime Casanova
3edd87a041 Fix tabs in HISTORY 2012-07-27 11:20:56 -05:00
Jaime Casanova
740208da1c Fix typos in RELEASE NOTES 2012-07-27 11:15:50 -05:00
Jaime Casanova
7a00d5a9a4 Now that we can have no monitoring we need to check all nodes at failover
not only those in repl_monitor
2012-07-21 17:53:15 -05:00
Jaime Casanova
664e1a8321 Now that we can have no monitoring we need to check all nodes at failover
not only those in repl_monitor
2012-07-21 17:49:38 -05:00
Jaime Casanova
5683b905dd New development branch is 2.1dev 2012-07-21 12:22:04 -05:00
Jaime Casanova
d43c6334da Prepare HISTORY and release notes for release 2012-07-21 12:06:33 -05:00
Jaime Casanova
f984b3fd33 Document tunables added in aaf35947ed 2012-07-21 11:10:59 -05:00
Jaime Casanova
aaf35947ed Add tunables for connection retries to master and interval between
connection retries, these parameters along with master_response_timeout
determines the amount of time since failure to failover
2012-07-21 11:01:00 -05:00
Jaime Casanova
08ed0aa987 Commit 2d24518d9d added an additional
'}' at the end of parse_config(). removing.
2012-07-21 10:42:58 -05:00
Jaime Casanova
2d24518d9d If master_response_timeout hasn't been set in repmgr.conf it defaults
to zero, which was causing to a false positive in the failure detection
logic in wait_connection_availability(). So, change that to defaults to 60s
and add a check to avoid it being set to zero or negative.

Problem reported and analyzed by Andrew Newman
2012-07-21 09:49:05 -05:00
Jaime Casanova
a6c94b29de Change release notes because of commit bf241ba1d6 2012-07-06 02:00:46 -05:00
Jaime Casanova
bf241ba1d6 Make the monitoring history capabilities of repmgr be optional and
turned off by default. Most of it has been superseeded by
pg_stat_replication view, we can still start it by using the switch
--monitoring-history
2012-07-06 01:51:22 -05:00
Jaime Casanova
41dbc39527 Add release notes 2012-07-05 09:35:23 -05:00
Jaime Casanova
50b7147f15 Change Copyright date to cover 2012 2012-07-04 10:47:26 -05:00
Jaime Casanova
f5e57aa433 Add an option for "no-history" mode, where repmgrd just checks the
conectivity of master but don't INSERT any data into it
2012-07-04 10:07:31 -05:00
Jaime Casanova
ac5a9d1fd6 The release changed, just wait a little before setting it.
Also make well known names in HISTORY be only names, without
last name
2012-07-02 00:06:57 -05:00
Jaime Casanova
cb740b68be Add a check of the connection inside the CancelQuery() so it check
that before trying to cancel a query, which can block.
2012-06-26 11:29:02 -05:00
Jaime Casanova
d58ea77798 Add a quick setup for autofailover 2012-06-26 07:49:43 -05:00
Jaime Casanova
e3c3c22b6e Improve the version message to actually show the repmgr version not
only postgresql's one
2012-06-25 22:54:48 -05:00
Jaime Casanova
861a3c8f22 Fix CLUSTER CLEANUP, it needs to establish a local connection in order
to look for the master
2012-06-16 01:32:59 -05:00
Jaime Casanova
e51870b504 Force to enter a password for the superuser in the witness, this is
in case we need to send a password to connect as stated in
master's pg_hba.conf.
2012-06-15 13:51:45 -05:00
Jaime Casanova
5651720560 Remove a variable left in last commit 2012-06-15 09:46:01 -05:00
Jaime Casanova
d32a6cdb24 Remove kludge added to create user and db for witness.
It's too fragile, almost always cause a "segment violation" and
don't seems to be very useful.
2012-06-15 09:41:54 -05:00
Jaime Casanova
9e10987b90 Fix a few bugs introduced when merging features 2012-06-15 09:40:09 -05:00
Jaime Casanova
64fce88e99 Add a CLUSTER CLEANUP command to clean monitor's history,
also include a --keep-history (-k) option to indicate how many
days of history to keep
2012-06-13 00:39:54 -05:00
Jaime Casanova
7a76f1998c getMasterConnection() cannot avoid checking the same node that asks
to find the master.
This was a micro optimization based on the fact that all commands that
needed to detect the master were executed from the standby but now that
we have CLUSTER level commands that is not true anymore
2012-06-12 23:28:24 -05:00
Jaime Casanova
4db046a8ea Allow repmgr to obtain tablespace's locations from pg 9.2 and later
in which we no longer have a spclocation column in pg_tablespaces
2012-06-12 11:08:15 -05:00
Jaime Casanova
331eca447a STANDBY CLONE should be run by a SUPERUSER, otherwise we won't be able
to retrieve data_directory and the other parameters we need by
querying the database.
2012-06-12 09:42:50 -05:00
Jaime Casanova
b5b2f93f7e Merge branches 'master' and 'async' 2011-12-02 00:28:17 -05:00
Jaime Casanova
9d03d4a254 After checking that master is alive, is_pgup() should return not keep
checking forever.
2011-12-01 23:58:12 -05:00
Jaime Casanova
3b2ccc5b78 Add a master_response_timeout parameter and use it to limit the amount
of time we spent a reponse from master before declaring the failure.
Also, change is_pgup() so it use PQsendQuery() instead of PQexec to
execute the check of master
2011-12-01 01:20:33 -05:00
Jaime Casanova
89a1e2bcbd Not even consider old master as an option in failover 2011-11-27 19:17:59 -05:00
Jaime Casanova
7077a7c68f Add -w option to pg_ctl commands so we wait until command is finish.
Or at least, we try. By default, after 60 seconds pg_ctl just return.
This make useless to wait ourselves after pg_ctl start of witness so
remove the sleep
2011-11-27 18:38:53 -05:00
Jaime Casanova
9b8fb7e960 Remove last argument from log_err, left in commit 55c7ea4b5e.
Also rephrase the sentence

Reported by Jeroen Dekkers
2011-11-25 14:59:29 -05:00
Jaime Casanova
55c7ea4b5e Fix a wrong message.
It was saying the problem is the version of the PostgreSQL server while
it actually is because the MASTER REGISTER command was running on a
standby node
2011-11-10 09:38:12 -05:00
Jaime Casanova
0240cb2bfb Fix a typo introduced in commit cb764b180d 2011-11-03 12:50:51 -05:00
Jaime Casanova
2e64028f26 Add the improvement of the view repl_status to the history 2011-10-20 23:26:41 -05:00
Greg Smith
27396f462a Add strnlen on platforms that don't have it, such as OS X 2011-10-20 23:23:31 -05:00
Jaime Casanova
cb764b180d Let the clone happen in a session with synchronous_commit off. This
is because in pg 9.1 the default configuration can easily allow sync
rep to be activated even if no standby is present and will block
pg_start_backup() and pg_stop_backup() in that case.
2011-10-03 14:04:27 -05:00
Jaime Casanova
65cf045a63 Fix a few problems introduced in 020e17b059
while adding standby_name
2011-08-09 02:01:00 -05:00
Jaime Casanova
3d53661d97 Fix a typo introduced in the commit about standby_name
(020e17b059)
2011-08-09 01:14:50 -05:00
Jaime Casanova
7b3a9cbbe4 Rename standby_name configuration option as node_name
per gripe by Cedric
2011-08-09 01:01:37 -05:00
Jaime Casanova
276c947202 Add name to nodes and show it for the standby in repl_status. 2011-08-09 00:58:01 -05:00
Jaime Casanova
020e17b059 Add a new standby_name parameter that, when provided in repmgr.conf,
is included in the primary_conninfo as application_name... This is
a simple first step towards making repmgr works fine with 9.1 and
sync standbys
2011-08-09 00:57:23 -05:00
Jaime Casanova
10817ecffc Add "witness" as a node role in CLUSTER SHOW 2011-08-08 13:29:26 -05:00
Jaime2ndQuadrant
f26e37c087 Merge pull request #2 from 2ndquadrant-it/master
Add function to show nodes with actual roles: repmgr cluster show
2011-08-08 11:10:13 -07:00
Carlo Ascani
61c31eeb0c Added function to show nodes with actual roles: repmgr cluster show 2011-08-08 19:32:32 +02:00
Jaime2ndQuadrant
0b5d2418df Merge pull request #1 from 2ndquadrant-it/master
Added function "write_primary_conninfo" which now adds the username to the primary_conninfo parameter in recovery.conf

Author: Gabriele and Marco
2011-08-05 11:03:54 -07:00
Gabriele Bartolini
f44037088b Added function "write_primary_conninfo" which now adds the username to the primary_conninfo parameter in recovery.conf
Signed-off-by: Gabriele Bartolini <gabriele.bartolini@2ndquadrant.it>
Signed-off-by: Marco Nenciarini <marco.nenciarini@2ndquadrant.it>
2011-08-05 17:41:18 +02:00
Carlo Ascani
e05dd729f8 Fix a typo in documentation 2011-08-05 00:15:13 -05:00
Jaime Casanova
ab8d2673dc Remove useless second parameter from create_recovery_file()
per gripe by Gabriele
2011-08-04 17:45:28 -05:00
Jaime Casanova
28c5ac3d91 Make is_standby() and is_witness() work the same way, actually
is_standby() can show the segmentation fault problem but for code
clearity
2011-07-26 18:36:27 -05:00
Carlo Ascani
c983fdf83c Fix a possible double free that would cause a segfault in
checkNodeConfiguration()
2011-07-26 18:22:06 -05:00
Carlo Ascani
158214acae Only log that we have restored the connection when that already happens,
this fix a flood in logs that make them unnecessarily big
2011-07-26 18:16:22 -05:00
Jaime Casanova
04290c1f60 Another pass reviewing code:
- remove a duplicate check for ssh connection and just exit if
  we can't connect to the remote host.
  stop_backup is only needed if pg_start_backup() has been
  already called
- remove a new connection to master in stop_backup label, AFAIC
  we hadn't close the one we already opened
- add a lot of PQfinish(), a few PQclear() and adjust code to what
  we used to do in 1.x
2011-07-26 16:14:44 -05:00
Jaime Casanova
cedc5e20fb Add a check to ensure 'hot_standby=on' on master. While this parameter
doesn't have an effect here it will be a reminder for users, besides
will reduce things user have to do on the standby
2011-07-26 16:12:51 -05:00
Jaime Casanova
b61ac86b5f Move repmgr_update_standby_location() and
repmgr_get_last_standby_location() functions to repmgr schema.
This will be important when we want to create this as an
extension, so better make it be in the right place from day 1.
2011-07-26 16:12:11 -05:00
Jaime Casanova
64da72b48b repl_monitor.last_wal_standby_location should accept nulls because
of the witness
2011-07-26 16:11:26 -05:00
Jaime Casanova
c4778a83c5 Be consistent in the use of sqlquery_snprintf() for query strings,
which was not used in the new code.
Also add a few "newline" characters in some messages, and other
minutia.
2011-07-19 05:02:51 -05:00
Jaime Casanova
4d26e4d21e Redefining repl_status view to improve performance, also add an
index on repl_monitor to speed up even more the view.
2011-06-13 11:58:43 -05:00
Jaime Casanova
3f4ff5a862 A "HEAD" word remaining from a merge conflict was accidentally left
in... removing it
2011-06-10 21:16:07 -05:00
Simon Riggs
1d05467df1 README additions and improvements 2011-06-07 11:37:09 +01:00
Simon Riggs
15ba3342aa Remove whitespace 2011-06-07 11:12:58 +01:00
Greg Smith
3a950c9f8b Squashed commit of the following:
commit e7ef17117efe6679e154a4905d587c808b48df50
Merge: cd3a280... 43268f2...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 01:40:08 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	repmgr.c

commit cd3a280804a01c5270c5c743e5822c7beb9ac77a
Merge: 72ad378... 8200b68...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:52:42 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	config.c

commit 72ad378bed21d74dab743fec411fe10b19007481
Merge: 17bafa1... 367d0b1...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:38:01 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	config.c
    	dbutils.c
    	repmgr.c
    	repmgrd.c

commit 17bafa1ca509c1f6614810bab2538e570ebc599e
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:31:28 2011 -0400

    Run astyle to fix recent changes

commit a5fbbaecce8fe86bc17c0ebeb1324f9262967316
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue May 10 00:46:58 2011 +0200

    Fix a crititcal bug in the decision process

    If the postgresql on the first node returned by the query to find
    candidates in do_failover is down then the initialization of the
    bestCandidate is done with non assigned variables.

    Fix the situation by moving the initialization in the loop above.
    And loop until we have a find_best. Added a log message if no candidate
    is found

commit 42b21475ac248db8f0e50f5956ef96808e92c68c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon May 9 22:39:21 2011 +0200

    Add test_ssh_connection

    The feature was written by Jaime and reworked to fix
    https://github.com/greg2ndQuadrant/repmgr/issues/5

commit 86f01afae631e9541600af6578e649d88c3ece98
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon May 9 21:39:42 2011 +0200

    Improve log output

commit db2f29fc1c8ea03a8ff85717873f8a876846b844
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:41:34 2011 -0500

    Only compare getenv("USER") when it's actually set, otherwise it
    will segfault

commit ea4f3f20747e2e0294551d5e61869bdde6d3cd7b
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:03:39 2011 -0500

    Fix a message to only show when log_info is requested and the verbose
    flag is set.
    This is because it needs a calculation that is only done when the
    verbose flag is set, so if i have requested log INFO level but haven't
    set the flag it shows a null

commit 35a53bac7e341cfdbb64d2c15fa77c9c4e18bd40
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:00:54 2011 -0500

    Use log_* functions in do_witness_create()

commit 8c526f758a46ad53b4d391fc76360561d4ff8bdd
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun May 8 19:30:34 2011 -0500

    Add a fallback_application_name parameter to the conninfo identify
    the connection if application_name is not set

commit 01057fc12cbc1fb656d619f483044f28a5f08d37
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri May 6 23:57:27 2011 +0200

    Fix the best_candidate loop

    there was an overflow in the loop, already fixed but loosed during merge.

commit e80effa3daf56f08005704fc1a5bbe69c1324212
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri May 6 23:55:15 2011 +0200

    Fix check in do_failover (merge faillure)

    And also remove an unused variable as I was here.

commit 79ba37e2933f4e87523a77375dfda1d96150e7d3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu May 5 21:15:46 2011 +0200

    Fix compile error

commit 67c7b5d68c95a60bb4cd0cfb750b4c8d047fa2a0
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 23:27:57 2011 +0200

    And apply astyle  ....

commit 9a321722537d96983b8162227ff629a267b6ed67
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 23:27:09 2011 +0200

    Cosmetic change to reduce diff with master

commit 09037efea3fa2c31896b5dc78b0340516a743ba6
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 22:26:03 2011 +0200

    Apply astyle

commit 7c4786f662943558be967be4a8dad976f52155dd
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 02:22:12 2011 +0200

    Improve the standby clone action

    By default, all config files and directories are cloned from the master in the
    same place in the slave.
    If a destination directory is provided (-D), everything is copied in the
    provided dir, and if the master have tablespaces repmgr exit without cloning.

commit a6d7f765b9403a2cff7e2e1df8ae45a5a7ee1665
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 23:31:09 2011 +0200

    Add success message for repmgr standby register

commit 26bf3b08e661137dd3f3c0d3c00fd6b3b90b08b3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:51:28 2011 +0200

    Change the exit to a return in config.c

commit 1bd8f4c119e1dbf9a94b2eaec884abce96eeb174
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:32:57 2011 +0200

    Reduce duplicate code

commit db553fab45ca075f95f09bdb2147de68948b60c8
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:24:04 2011 +0200

    Some cosmetic

commit f19d0ad714ebcf7df7726772e887c873d005d350
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:23:06 2011 +0200

    Move a function declaration into header file

commit 1f328bc438c896a9f2067069d756f901b58d41f2
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon Apr 11 00:38:30 2011 -0500

    We don't use conninfo as a separate variable anymore

commit f6ade0d63b8a5dd43377f546f5311b4a151b2bfb
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 20:53:22 2011 -0500

    Fix a few typos

commit ceca9fa983c8dbde61a7a78da29a1e1871756d8c
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 19:32:57 2011 -0500

    Fix code to allow the code to compile:
    - some log_* had problems with parenthesis
    - some uses of variables without the runtime_options prefix

commit 73431f955afd77560bca5370924e09329566c4b7
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:21:37 2011 +0200

    Fix the debian package name

commit 688eab371110083ae8715b35f414e29c6d87e1ac
Merge: 5c23375... 7995c42...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:17:58 2011 +0200

    Merge branch 'autofailover' of git.2ndquadrant.it:repmgr into autofailover

commit 5c23375f88a53ed469e9d13934d618f7a74669be
Merge: cc3315c... c4ae574...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:08:36 2011 +0200

    Merge branch 'master' into autofailover

    Conflicts:
    	repmgr.c

commit 7995c428161566cfc54a67eb16f9134c859e7381
Merge: 788ff98... 1303e49...
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 16:14:30 2011 -0500

    Merge branch 'autofailover' of git+ssh://git.2ndquadrant.it/git/repmgr into autofailover

commit cc3315ce235b898711c34fd1f2fa1116dbee4e16
Merge: 1303e49... d77186c...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:03:11 2011 +0200

    Merge commit 'd77186c90444b9c5ca2de201651841f56a7ded02' into autofailover

commit 1303e49852705046e15ef64f5f7ab739a1689431
Merge: 7ff621b... 4c792c8...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 22:28:08 2011 +0200

    Merge commit '4c792c8013f5713589f53dbdb47721febf139a85' into autofailover

commit 788ff98e94311a33e3e6f7d85a303cbc61288e5f
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Mar 15 19:00:56 2011 -0500

    A few fixes after merge to unbroke what the merge broke, and to make
    the new logging system more consistent through the system

commit 7ff621b96784dfaf40baab4f0f8e7857b4aed6ce
Author: Dan Farina <drfarina@acm.org>
Date:   Tue Dec 7 21:30:44 2010 -0800

    Install install/uninstall SQL also.

    Signed-off-by: Dan Farina <drfarina@acm.org>
    Signed-off-by: Peter van Hardenberg <pvh@heroku.com>

commit c9147dad8223eff20bf5d52ced8a35eed6d82110
Author: Dan Farina <drfarina@acm.org>
Date:   Tue Dec 7 21:30:20 2010 -0800

    Split up install/uninstall actions more like a standard contrib

    Signed-off-by: Dan Farina <drfarina@acm.org>
    Signed-off-by: Peter van Hardenberg <pvh@heroku.com>

commit c8028780b50f2c7fb4384cb9891796647f356e19
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sat Feb 12 13:29:32 2011 +0100

    Fixing SLEEP times and RETRY

commit 39a1bf3d29f3e33fbf0e1b066a311e8a72f2dc38
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sat Feb 12 01:17:37 2011 +0100

    Add a pause after update_shared_memory() in do_failover

    we pause for SLEEP_MONITOR+1 to let other nodes update themselves.

commit 527af2baa945e3b640352c01c6dd181d93c9529a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 21:14:22 2011 +0100

    change the debian package filename too

commit c8cb27c7039b2b3a838554187a8add850a42027a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 15:14:40 2011 +0100

    Change package name for the automatic fail-over branch of repmgr

commit 7427988628f754e57069453d65a71f79117c3a3d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:28:03 2011 +0100

    Exit 1 when SIGINT

commit af366fe731b70e24ead056e50b69269392bd15a1
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:27:46 2011 +0100

    Improve log output when reloading configuration

commit 6cc18ce081d7bf55ba9993e9d87567879da35c4d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:20:36 2011 +0100

    Add reload conf on (re)start

commit 4259e2c410fd0ef1273c7d1b4ab8fcf1e778e968
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:01:37 2011 +0100

    avoid double free on repmgrd exit as master
    Per commit from Charles Duffy <charles@tippr.com>
    and faillure to cherry-pick it correctly.

    Conflicts:

    	repmgrd.c

commit 431e27b1c005e000f9a346d982419979b4363d77
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Thu Feb 10 15:09:18 2011 -0500

    Tweak .gitignore to ignore more doc build artifacts

commit b725fa9ae65c7bd5fea7a4e944db5685dee2e8bd
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Mar 13 15:16:27 2011 -0500

    Delete a paragraph that appears twice, because a merge problem

commit d990c77b327a282c1903b7a339f35a22b6a89958
Author: trbs <trbs@trbs.net>
Date:   Tue Jan 11 18:24:17 2011 +0100

    added note about postgresql-server-dev-9.0 and use libxslt-dev instead of version specific package name

commit 69bc1cd3772103b529598978160327e1f9025157
Author: trbs <trbs@trbs.net>
Date:   Fri Jan 7 01:32:31 2011 +0100

    fix line

commit f7b1d1e5e3764c85cec7afa81c164fac3679e1ea
Author: trbs <trbs@trbs.net>
Date:   Thu Dec 23 15:02:23 2010 +0100

    Updated README with Debian/Ubuntu install information

commit 77d28960ff78c3936be0e1029305b0b578e260a9
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 13:34:49 2011 +0100

    Create the function used for shared memory access in create_schema, note that this is incompatible with current master

commit 4a73043f232f0a143ede898841530f4d7442c95b
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 10:00:34 2011 +0100

    improve log output

commit 62c90a4e86b2cd56ec14255adcfef564945d0769
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:40:05 2011 -0500

    Close local connection on witness before exit on error of primary

commit e5156865e05670fa9944d74d472127082556d0a0
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:34:25 2011 -0500

    Remove a semicolon which is just a typo

commit 7586a09bc321241932adacf6a1431029964dc46f
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:07:02 2011 -0500

    Fix the computation of quorum, we need to count master and the
    division should not be an integer division

commit a19c0ad2059a00e9e7415fc6ea280c109c809c9c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 10 23:54:35 2011 +0100

    move the functions back into public schema

commit 19fc8ffb1dc0fd9acddad5d22bf5c01704687474
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Thu Feb 10 00:48:00 2011 -0500

    A few more fixes.
    Make repmgr functions exists in repmgr schema and fix a typo that
    caused a seg fault.

commit c6d2b8c6421f93074d7d616980feb0175ee4ef36
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 17:56:44 2011 -0500

    A few places where i forgot to update the priority field

commit 0ff0bb8d981b868693c6a751e7e80473b25f2399
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 14:24:43 2011 -0500

    Fix a few bugs from last commit and make reload configuration also
    update registration on repl_nodes

commit 508c34e9dfb2bfb7e47d5c6836ead7992e6112fe
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 13:45:20 2011 -0500

    Add a way for the user to indicate it's preference about which node
    should be promoted in case of a conflict (ie: two nodes with the
    same wal location).

    This will be provided as a parameter in repmgr.conf called priority,
    andd will be registered in the repl_nodes table.

commit 6005f1bbf90de61b4c5ebc34302307fa05b019a7
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 11:15:30 2011 -0500

    Add a heartbeat for the witness, this should write to repl_monitor
    table so we can see the witness in repl_status and monitor if it
    is working.

    Also close connection at the end of do_witness_create in repmgr.c

commit ac1c6367ab689aeae2eff3dda22db42337f300c1
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 01:26:41 2011 -0500

    Add a sighup handler to reload the configuration

commit 7df2fb7b74a3c5287319e56112840d9c2a3e7d5b
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 18:42:36 2011 +0100

    Change the is_pgup () check test

    remove spurious 'return'

commit 7e58e6aa91ab3f681854a44fe282b44da81768fa
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 16:53:17 2011 +0100

    Add constant for the sleep times and retry, rework monitor functions

    Rename MonitorExecute() to StandbyMonitor()
    Add    WitnessMonitor() # very simple version to start service mode isolation

commit 1b270dab2e2c3c60527b86a33cd0fc9c0d11c08c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 16:23:01 2011 +0100

    Improve PrimaryCheck

    add a function "bool is_pgup()"

    Now, repmgrd-master can work.

commit c6f07229713c8f2b77596459c06184edddd8d77e
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 19:31:06 2011 +0100

    Fix strcmp in config parser, now failover parameter should be set correctly

commit 0b690698a0d9aa87d3e8f1e462ee0771aa2ae9e8
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 16:23:50 2011 +0100

    fix sprintf extra param

commit 6050da315824048661be9c425ae6005576e5870f
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 13:53:29 2011 +0100

    Add some other files to ignore

commit a146dd581b46ea0e26b7b56b087d6b0d4ae15d44
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 13:53:04 2011 +0100

    Fix SQL query

commit 8f5db0f9c0f68ce2519afda72b6a778536427eab
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 00:51:54 2011 +0100

    Some more minor fix and remove TODO

commit c9299ad74e8f929bdc24804a6a834f24b66b7074
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 00:39:18 2011 +0100

    fix some memory leak and fix testlogic for is_standby is_witness

    * is_standby() must be tested *after* is_witness else we think we are in a master
    * remove SELECT * in favor of SELECT witness

commit cc5d06ea8bf1dcde4c264e95eb90f7fb1e821af3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 23:40:15 2011 +0100

    Forgot to remove a param from fprintf

commit 426e22fa8dfd78f0c256bda1b166a31807de9ec6
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 22:47:58 2011 +0100

    Restore previous usage of --force and rsync tablespace before data_dir

    The --force option is used to reduce the time needed to restore a failed
    node: it will overwrite existing files thanks to rsync --delete option

    The tablespaces need to be coyed first, because there are symlinks to
    them from the data_directory

commit 1937973fced703d14159e6aae1cbdabb5619accb
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 21:09:12 2011 +0100

    Improve message of repmgrd

commit 035a9bcc1eea55cd95790bc72276727cc492694a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 21:08:38 2011 +0100

    Fix (bool *)PQgetval

commit bf9181654213f898949e9c8f094b974915f82258
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 01:54:49 2011 +0100

    Fix pg_hba on witness and connection

    * Copy the pg_hba.conf file from master to witness server
    * createdb and createuser in witness if they are different from getenv(USER)

commit a2d8dcb2fd105d8f02bd76856969aca6605c66fa
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 01:01:43 2011 +0100

    Improve initialization of repmgr (+ critical bug and minor fixes)

    * standby clone now *clone* the master files and dir to the *same*
      place on the standby if destination_directory is not provided
    * add preload library to the witness configuration
    * sleep 2 seconds after starting the witness postgresql to let it
      start enough to be able to connect to it.
    * Fix rsync files
    * Fix insert configuration into witness

commit bc1a265d272e4805ac7859c208b51b57edd10fc7
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 12:25:20 2011 +0100

    Fix some error message new line

commit e087bd5de5ab43ffac90c6a20df6ef3fb19eed6d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 11:37:08 2011 +0100

    Guess data_directory from master in 'standby clone' and remove --force for dir

    --force does not overwrite directories anymore (it was not working very well anyway)
    dest_dir is the same as the master's one by default.
    Move down the tablespace check directories process

commit 0a961e7ef05f26c87af1946b8141a639076fc488
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 11:21:40 2011 +0100

    Add new function: create_pgdir (and fix 2 bugs in the process).
    It also fix function create_schema.

    Reduce repmgr code

commit 7e5958dcc1daa9b54cb6f295af96fbef750c7952
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 10:34:58 2011 +0100

    Improve an ERROR message

commit f3a66a65a361f919727fc2d0ff9bf9544a10a822
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 10:25:45 2011 +0100

    Improve error message about 'wal_keep_segments'

commit 150dbcc0fe53ce4eff08797210fd2e9e4dd0e17a
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sat Jan 29 23:35:00 2011 -0500

    Add witness server support

commit 6281e22a9c467da883ad960567f8ab6bdbc155ba
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 21:32:11 2011 +0100

    Build all at once and update debian makefile to include the sql/

commit 50d752bf1ead7c9343900d4b494844284b7aac6c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 02:10:31 2011 +0100

    Adding information for debian and --version test

commit 16d56dbfa05314eea69869ee2a7a705636432ad9
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 02:03:20 2011 +0100

    Add a hint at the end of the standby clone
    and minor typo and message shuffle

commit 6404ba247de1e2e3b995f30b6e7626e459849136
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 26 06:13:30 2011 -0500

    Fix compiler warning about variables beign used unintialized

commit a4f48993d5fe3b22bdd2aaefcff315115f8764b7
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Jan 21 21:09:03 2011 -0500

    Fix a new typo

commit 904e61c9edcbbce6b1027c80ff77317d7cbd4919
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Jan 21 19:30:56 2011 -0500

    Use a function to make the call to repmgr_update_standby_location()
    so i avoid typos like the one i fixed in a previous commit. It also
    makes the code cleaner.

commit 4ed388726f4bc0a52cc88d044d1f81697f348a7c
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 19 09:17:16 2011 -0500

    Fix a typo when calling the sql function that writes shared memory

commit d9232266561306eabef90e13c084c051a0e7f458
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Jan 18 01:25:23 2011 -0500

    Define the variable that we are using to test the result status of
    the system() call.

commit 4d131c212b91e40ca027f76637c182456ab12514
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Jan 18 01:04:12 2011 -0500

    Makes repmgrd warn if promote_command or follow_command fails, add
    a "still alive" check for primary.
    Add a few messages and fix a bug in do_failover() in which we were
    using a closed PGresult.

commit a5189e68cf4c8cf84259ea667a35e96de56fa4c9
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Thu Jan 13 15:45:50 2011 -0500

    Initial attempt to get autofailover

commit d0e09010a9d4610997c900b62ea1df2a71b01015
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 12 14:40:29 2011 -0500

    Add options failover, promote_command and follow_command
    to repmgr.conf, in pass also rename sample repmgr.conf to
    repmgr.conf.sample
    promote_command and follow_command allows to use a custom script
    for those actions.
2011-06-07 01:42:15 -04:00
Greg Smith
43268f2345 Refactor more log messages to use "_" form 2011-06-07 01:17:41 -04:00
Greg Smith
8200b68cf7 Convert some remaining fprintf messages to logger 2011-06-07 00:50:46 -04:00
Greg Smith
06aecbf58a Fix copyright date on repmgr.sql 2011-06-07 00:46:16 -04:00
Greg Smith
367d0b1afa astyle fixes on logging change 2011-06-07 00:30:35 -04:00
Greg Smith
e52f2e88a5 Update use of paranthesis in logging messages.
Extracted portion of what was originally
ceca9fa983c8dbde61a7a78da29a1e1871756d8c , due to be
applied later.
2011-06-07 00:19:11 -04:00
Cédric Villemain
620f8090a3 Add test_ssh_connection
The feature was written by Jaime and reworked by me to fix
https://github.com/greg2ndQuadrant/repmgr/issues/5
2011-05-09 22:52:29 +02:00
Jaime Casanova
534649eb03 Fix a message to only show when log_info is requested and the verbose
flag is set.
This is because it needs a calculation that is only done when the
verbose flag is set, so if i have requested log INFO level but haven't
set the flag it shows a null
2011-05-09 01:07:54 -05:00
Jaime Casanova
1e8b9e8960 Add a fallback_application_name parameter to conninfo to identify
the connection if application_name is not set
2011-05-08 19:26:59 -05:00
Cédric Villemain
c4ae5741a3 Fix getopt for ignore-rsync-warning
The change was loosed during merge and not checked in master/
2011-04-10 22:42:38 +02:00
Greg Smith
f824189692 Cleanup new ignore rsync warning feature. Closes gh-22. 2011-04-05 17:52:39 -04:00
Greg Smith
68b42d16c9 Merge commit '3ca0f2db06c9114c59e0846dda487cca9b1e85ff' 2011-04-05 17:26:33 -04:00
Cédric Villemain
235903e517 Fix rsync return code test 2011-03-28 18:42:20 +02:00
Cédric Villemain
bf29601218 remove the exit on rsync error because it is handle by the caller 2011-03-28 17:08:22 +02:00
Cédric Villemain
02a3fbb303 Add --ignore-rsync-warning to README 2011-03-28 16:46:47 +02:00
Cédric Villemain
b30398bf2a Add --ignore-rsync-warning command line option
This fix the rsync return code in case there are vanished files.

Common situation are DROPed tables and TEMPorary object deletion and
are handled by PostgreSQL.
But as it may exist situation where an external process delete files in
the PGDATA the flag is off by default.

XXX 2 items :

 * is -I a good choice ? maybe we need to prevent future --ignore-foo and
   add something like : --ignore=rsync_warning -I rsync_warning
 * the warning message is not enough explicit with the risk involved by
   --force usage
2011-03-28 16:38:55 +02:00
Cédric Villemain
e74e319e7a Applying indent style 2011-03-28 16:05:09 +02:00
Jaime Casanova
d77186c904 Ignore error 24 from rsync (that could happen if we use a lot of
temp tables for example).
It only ignores that error if we are copying a directory (which means
we are in pg_start_backup() and then ignore changes that happen
due to concurrent access is fine).
2011-03-28 16:02:32 +02:00
38 changed files with 4959 additions and 1449 deletions

3
.gitignore vendored
View File

@@ -1,6 +1,9 @@
*~ *~
*.o *.o
*.so
repmgr repmgr
repmgrd repmgrd
README.htm* README.htm*
README.pdf README.pdf
sql/repmgr_funcs.so
sql/repmgr_funcs.sql

View File

@@ -1,4 +1,4 @@
Copyright (c) 2010-2011, 2ndQuadrant Limited Copyright (c) 2010-2014, 2ndQuadrant Limited
All rights reserved. All rights reserved.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify

View File

@@ -10,5 +10,7 @@ Hannu Krosing <hannu@2ndQuadrant.com>
Cédric Villemain <cedric@2ndquadrant.com> Cédric Villemain <cedric@2ndquadrant.com>
Charles Duffy <charles@dyfis.net> Charles Duffy <charles@dyfis.net>
Daniel Farina <daniel@heroku.com> Daniel Farina <daniel@heroku.com>
Marco Nenciarini <marco.nenciarini@2ndquadrant.it> Shawn Ellis <shawn.ellis17@gmail.com>
Carlo Ascani <carlo.ascani@2ndquadrant.it> Jay Taylor <jay@jaytaylor.com>
Christian Kruse <christian@2ndQuadrant.com>
Krzysztof Gajdemski <songo@debian.org.pl>

46
HISTORY
View File

@@ -1,3 +1,45 @@
2.0.2 2015-02-17
Add "--checksum" in rsync when using "--force" (Jaime)
Use createdb/createuser instead of psql (Jaime)
Fixes to witness creation and monitoring (wamonite)
Use default master port if none supplied (Martín)
Documentation fixes and improvements (Ian)
2.0.1 2014-07-16
Documentation fixes and new QUICKSTART file (Ian)
Explicitly specify directories to ignore when cloning (Ian)
Fix log level for some log messages (Ian)
RHEL/CentOS specfile, init script and Makefile fixes (Nathan Van Overloop)
Debian init script and config file documentation fixes (József Kószó)
Typo fixes (Riegie Godwin Jeyaranchen, PriceChild)
2.0stable 2014-01-30
Documentation fixes (Christian)
General refactoring, code quality improvements and stabilization work (Christian)
Added proper daemonizing (-d/--daemonize) (Christian)
Added PID file handling (-p/--pid-file) (Christian)
New config option: monitor_interval_secs (Christian)
New config option: retry_promote_interval (Christian)
New config option: logfile (Christian)
New config option: pg_bindir (Christian)
New config option: pgctl_options (Christian)
2.0beta2 2013-12-19
Improve autofailover logic and algorithms (Jaime, Andres)
Ignore pg_log when cloning (Jaime)
Add timestamps to log line in stderr (Christian)
Correctly check wal_keep_segments (Jay Taylor)
Add a ssh_options parameter (Jay Taylor)
2.0beta1 2012-07-27
Make CLONE command try to make an exact copy including $PGDATA location (Cedric)
Add detection of master failure (Jaime)
Add the notion of a witness server (Jaime)
Add autofailover capabilities (Jaime)
Add a configuration parameter to indicate the script to execute on failover or follow (Jaime)
Make the monitoring optional and turned off by default, it can be turned on with --monitoring-history switch (Jaime)
Add tunables to specify number of retries to reconnect to master and the time between them (Jaime)
1.2.0 2012-07-27 1.2.0 2012-07-27
Test ssh connection before trying to rsync (Cédric) Test ssh connection before trying to rsync (Cédric)
Add CLUSTER SHOW command (Carlo) Add CLUSTER SHOW command (Carlo)
@@ -9,7 +51,7 @@
1.1.1 2012-04-18 1.1.1 2012-04-18
Add --ignore-rsync-warning (Cédric) Add --ignore-rsync-warning (Cédric)
Add strnlen for compatibility with OS X (Greg) Add strnlen for compatibility with OS X (Greg)
Improve performance of repl_status view (Jaime) Improve performance of the repl_status view (Jaime)
Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers) Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers)
Complete documentation about possible error conditions (Jaime) Complete documentation about possible error conditions (Jaime)
Document how to clean history (Jaime) Document how to clean history (Jaime)
@@ -17,7 +59,7 @@
1.1.0 2011-03-09 1.1.0 2011-03-09
Make options -U, -R and -p not mandatory (Jaime) Make options -U, -R and -p not mandatory (Jaime)
1.1.0b1 2011-02-24 1.1.0b1 2011-02-24
Fix missing "--force" option in help (Greg Smith) Fix missing "--force" option in help (Greg Smith)
Correct warning message for wal_keep_segments (Bas van Oostveen) Correct warning message for wal_keep_segments (Bas van Oostveen)
Add Debian build/usage docs (Bas, Hannu Krosing, Cedric Villemain) Add Debian build/usage docs (Bas, Hannu Krosing, Cedric Villemain)

View File

@@ -1,6 +1,6 @@
# #
# Makefile # Makefile
# Copyright (c) 2ndQuadrant, 2010-2011 # Copyright (c) 2ndQuadrant, 2010-2014
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
@@ -11,15 +11,18 @@ PG_CPPFLAGS = -I$(libpq_srcdir)
PG_LIBS = $(libpq_pgport) PG_LIBS = $(libpq_pgport)
all: repmgrd repmgr all: repmgrd repmgr
$(MAKE) -C sql
repmgrd: $(repmgrd_OBJS) repmgrd: $(repmgrd_OBJS)
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd $(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
$(MAKE) -C sql
repmgr: $(repmgr_OBJS) repmgr: $(repmgr_OBJS)
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr $(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
ifdef USE_PGXS ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs) PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS) include $(PGXS)
else else
subdir = contrib/repmgr subdir = contrib/repmgr
@@ -30,9 +33,26 @@ endif
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now # XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
# is overriding pgxs install. # is overriding pgxs install.
install: install: install_prog install_ext
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' install_prog:
mkdir -p '$(DESTDIR)$(bindir)'
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)/'
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)/'
install_ext:
$(MAKE) -C sql install
install_rhel:
mkdir -p '$(DESTDIR)/etc/init.d/'
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
mkdir -p '$(DESTDIR)/etc/sysconfig/'
$(INSTALL_PROGRAM) RHEL/repmgrd.sysconfig '$(DESTDIR)/etc/sysconfig/repmgrd'
mkdir -p '$(DESTDIR)/etc/repmgr/'
$(INSTALL_PROGRAM) repmgr.conf.sample '$(DESTDIR)/etc/repmgr/'
mkdir -p '$(DESTDIR)/usr/bin/'
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)/usr/bin/'
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)/usr/bin/'
ifneq (,$(DATA)$(DATA_built)) ifneq (,$(DATA)$(DATA_built))
@for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \ @for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \
@@ -45,10 +65,18 @@ clean:
rm -f *.o rm -f *.o
rm -f repmgrd rm -f repmgrd
rm -f repmgr rm -f repmgr
$(MAKE) -C sql clean
deb: repmgrd repmgr deb: repmgrd repmgr
mkdir -p ./debian/usr/bin mkdir -p ./debian/usr/bin
cp repmgrd repmgr ./debian/usr/bin/ cp repmgrd repmgr ./debian/usr/bin/
mkdir -p ./debian/usr/share/postgresql/9.0/contrib/
cp sql/repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
cp sql/uninstall_repmgr_funcs.sql ./debian/usr/share/postgresql/9.0/contrib/
mkdir -p ./debian/usr/lib/postgresql/9.0/lib/
cp sql/repmgr_funcs.so ./debian/usr/lib/postgresql/9.0/lib/
dpkg-deb --build debian dpkg-deb --build debian
mv debian.deb ../postgresql-repmgr-9.0_1.0.0.deb mv debian.deb ../postgresql-repmgr-9.0_1.0.0.deb
rm -rf ./debian/usr

292
QUICKSTART.md Normal file
View File

@@ -0,0 +1,292 @@
repmgr: Quickstart guide
========================
repmgr is an open-source tool suite for mananaging replication and failover
among multiple PostgreSQL server nodes. It enhances PostgreSQL's built-in
hot-standby capabilities with a set of administration tools for monitoring
replication, setting up standby servers and performing failover/switchover
operations.
This quickstart guide assumes you are familiar with PostgreSQL replication
setup and Linux/UNIX system administration. For a more detailed tutorial
covering setup on a variety of different systems, see the README.rst file.
Conceptual Overview
-------------------
repmgr provides two binaries:
- `repmgr`: a command-line client to manage replication and repmgr configuration
- `repmgrd`: an optional daemon process which runs on standby nodes to monitor
replication and node status
Each PostgreSQL node requires a repmgr configuration file; additionally
it must be "registered" using the repmgr command-line client. repmgr stores
information about managed nodes in a custom schema on the node's current master
database.
Requirements
------------
repmgr works with PostgreSQL 9.0 and later. All server nodes must be running the
same PostgreSQL major version, and preferably should be running the same minor
version.
repmgr will work on any Linux or UNIX-like environment capable of running
PostgreSQL. rsync must also be installed.
Installation
------------
repmgr must be installed on each PostgreSQL server node.
* Packages
- RPM packages for RedHat-based distributions are available from PGDG
- Debian/Ubuntu provide .deb packages.
It is also possible to build .deb packages directly from the repmgr source;
see README.rst for further details.
* Source installation
- repmgr source code is hosted at github (https://github.com/2ndQuadrant/repmgr);
tar.gz files can be downloaded from https://github.com/2ndQuadrant/repmgr/releases .
repmgr can be built easily using PGXS:
sudo make USE_PGXS=1 install
Configuration
-------------
### Server configuration
Password-less SSH logins must be enabled for the database system user (typically `postgres`)
between all server nodes to enable repmgr to copy required files.
### PostgreSQL configuration
The master PostgreSQL node needs to be configured for replication with the
following settings:
wal_level = 'hot_standby' # minimal, archive, hot_standby, or logical
archive_mode = on # allows archiving to be done
archive_command = 'cd .' # command to use to archive a logfile segment
max_wal_senders = 10 # max number of walsender processes
wal_keep_segments = 5000 # in logfile segments, 16MB each; 0 disables
hot_standby = on # "on" allows queries during recovery
Note that repmgr expects a default of 5000 wal_keep_segments, although this
value can be overridden when executing the `repmgr` client.
Additionally, repmgr requires a dedicated PostgreSQL superuser account
and a database in which to store monitoring and replication data. The repmgr
user account will also be used for replication connections from the standby,
so a seperate replication user with the `REPLICATION` privilege is not required.
The database can in principle be any database, including the default `postgres`
one, however it's probably advisable to create a dedicated database for repmgr
usage.
### repmgr configuration
Each PostgreSQL node requires a repmgr configuration file containing
identification and database connection information:
cluster=test
node=1
node_name=node1
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
pg_bindir=/path/to/postgres/bin
* `cluster`: common name for the replication cluster; this must be the same on all nodes
* `node`: a unique, abitrary integer identifier
* `name`: a unique, human-readable name
* `conninfo`: a standard conninfo string enabling repmgr to connect to the
control database; user and name must be the same on all nodes, while other
parameters such as port may differ. The `host` parameter *must* be a hostname
resolvable by all nodes on the cluster.
* `pg_bindir`: (optional) location of PostgreSQL binaries, if not in the default $PATH
Note that the configuration file should not be stored inside the PostgreSQL
data directory.
Each node configuration needs to be registered with repmgr, either using the
repmgr command line tool, or the repmgrd daemon; for details see below. Details
about each node are inserted into the repmgr database (for details see below).
Replication setup and monitoring
--------------------------------
For the purposes of this guide, we'll assume the database user will be
`repmgr_usr` and the database will be `repmgr_db`, and that the following
environment variables are set on each node:
- $HOME: the PostgreSQL system user's home directory
- $PGDATA: the PostgreSQL data directory
Master setup
------------
1. Configure PostgreSQL
- create user and database:
```
CREATE ROLE repmgr_usr LOGIN SUPERUSER;
CREATE DATABASE repmgr_db OWNER repmgr_usr;
```
- configure postgresql.conf for replication (see above)
- update pg_hba.conf, e.g.:
```
host repmgr_db repmgr_usr 192.168.1.0/24 trust
host replication repmgr_usr 192.168.1.0/24 trust
```
Restart the PostgreSQL server after making these changes.
2. Create the repmgr configuration file:
$ cat $HOME/repmgr/repmgr.conf
cluster=test
node=1
node_name=node1
conninfo='host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
pg_bindir=/path/to/postgres/bin
3. Register the master node with repmgr:
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose master register
[2014-07-04 10:43:42] [INFO] repmgr mgr connecting to master database
[2014-07-04 10:43:42] [INFO] repmgr connected to master, checking its state
[2014-07-04 10:43:42] [INFO] master register: creating database objects inside the repmgr_test schema
[2014-07-04 10:43:43] [NOTICE] Master node correctly registered for cluster test with id 1 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
Slave/standby setup
-------------------
1. Use repmgr to clone the master:
$ repmgr -f $HOME/repmgr/repmgr.conf -D $PGDATA -d repmgr_db -U repmgr_usr -R postgres --verbose standby clone 192.168.1.2
Opening configuration file: ./repmgr.conf
[2014-07-04 10:49:00] [ERROR] Did not find the configuration file './repmgr.conf', continuing
[2014-07-04 10:49:00] [INFO] repmgr connecting to master database
[2014-07-04 10:49:00] [INFO] repmgr connected to master, checking its state
[2014-07-04 10:49:00] [INFO] Successfully connected to primary. Current installation size is 1807 MB
[2014-07-04 10:49:00] [NOTICE] Starting backup...
[2014-07-04 10:49:00] [INFO] creating directory "/path/to/data/"...
(...)
[2014-07-04 10:53:19] [NOTICE] Finishing backup...
NOTICE: pg_stop_backup complete, all required WAL segments have been archived
[2014-07-04 10:53:21] [INFO] repmgr requires primary to keep WAL files 0000000100000000000000AD until at least 0000000100000000000000AD
[2014-07-04 10:53:21] [NOTICE] repmgr standby clone complete
[2014-07-04 10:53:21] [NOTICE] HINT: You can now start your postgresql server
[2014-07-04 10:53:21] [NOTICE] for example : /etc/init.d/postgresql start
-R is the database system user on the master node. At this point it does not matter
if the `repmgr.conf` file is not found.
This will clone the PostgreSQL database files from the master, including its
`postgresql.conf` and `pg_hba.conf` files, and additionally automatically create
the `recovery.conf` file containing the correct parameters to start streaming
from the primary node.
2. Start the PostgreSQL server
3. Create the repmgr configuration file:
$ cat $HOME/repmgr/repmgr.conf
cluster=test
node=2
node_name=node2
conninfo='host=repmgr_node2 user=repmgr_usr dbname=repmgr_db'
pg_bindir=/path/to/postgres/bin
4. Register the master node with repmgr:
$ repmgr -f $HOME/repmgr/repmgr.conf --verbose standby register
Opening configuration file: /path/to/repmgr/repmgr.conf
[2014-07-04 11:48:13] [INFO] repmgr connecting to standby database
[2014-07-04 11:48:13] [INFO] repmgr connected to standby, checking its state
[2014-07-04 11:48:13] [INFO] repmgr connecting to master database
[2014-07-04 11:48:13] [INFO] finding node list for cluster 'test'
[2014-07-04 11:48:13] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
[2014-07-04 11:48:13] [INFO] repmgr connected to master, checking its state
[2014-07-04 11:48:13] [INFO] repmgr registering the standby
[2014-07-04 11:48:13] [INFO] repmgr registering the standby complete
[2014-07-04 11:48:13] [NOTICE] Standby node correctly registered for cluster test with id 2 (conninfo: host=localhost user=repmgr_usr dbname=repmgr_db)
Monitoring
----------
`repmgrd` is a management and monitoring daemon which runs on standby nodes
and which and can automate remote actions. It can be started simply with e.g.:
repmgrd -f $HOME/repmgr/repmgr.conf --verbose > $HOME/repmgr/repmgr.log 2>&1
or alternatively:
repmgrd -f $HOME/repmgr/repmgr.conf --verbose --monitoring-history > $HOME/repmgr/repmgrd.log 2>&1
which will track advance or lag of the replication in every standby in the
`repl_monitor` table.
Example log output:
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to database 'host=localhost user=repmgr_usr dbname=repmgr_db'
[2014-07-04 11:55:17] [INFO] repmgrd Connected to database, checking its state
[2014-07-04 11:55:17] [INFO] repmgrd Connecting to primary for cluster 'test'
[2014-07-04 11:55:17] [INFO] finding node list for cluster 'test'
[2014-07-04 11:55:17] [INFO] checking role of cluster node 'host=repmgr_node1 user=repmgr_usr dbname=repmgr_db'
[2014-07-04 11:55:17] [INFO] repmgrd Checking cluster configuration with schema 'repmgr_test'
[2014-07-04 11:55:17] [INFO] repmgrd Checking node 2 in cluster 'test'
[2014-07-04 11:55:17] [INFO] Reloading configuration file and updating repmgr tables
[2014-07-04 11:55:17] [INFO] repmgrd Starting continuous standby node monitoring
Failover
--------
To promote a standby to master, on the standby execute e.g.:
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby promote
repmgr will attempt to connect to the current master to verify that it
is not available (if it is, repmgr will not promote the standby).
Other standby servers need to be told to follow the new master with:
repmgr -f $HOME/repmgr/repmgr.conf --verbose standby follow
See file `autofailover_quick_setup.rst` for details on setting up
automated failover.
repmgr database schema
----------------------
repmgr creates a small schema for its own use in the database specified in
each node's conninfo configuration parameter. This database can in principle
be any database. The schema name is the global `cluster` name prefixed
with `repmgr_`, so for the example setup above the schema name is
`repmgr_test`.
The schema contains two tables:
* `repl_nodes`
stores information about all registered servers in the cluster
* `repl_monitor`
stores monitoring information about each node
and one view, `repl_status`, which summarizes the latest monitoring information
for each node.

View File

@@ -5,7 +5,7 @@ repmgr: Replication Manager for PostgreSQL clusters
Introduction Introduction
============ ============
PostgreSQL 9.0 allow us to have replicated Hot Standby servers PostgreSQL 9+ allow us to have replicated Hot Standby servers
which we can query and/or use for high availability. which we can query and/or use for high availability.
While the main components of the feature are included with While the main components of the feature are included with
@@ -20,6 +20,17 @@ databases as a single cluster. repmgr includes two components:
* repmgrd: management and monitoring daemon that watches the cluster * repmgrd: management and monitoring daemon that watches the cluster
and can automate remote actions. and can automate remote actions.
Supported Releases
------------------
repmgr works with PostgreSQL versions 9.0 and later.
There are currently no incompatibilities when upgrading repmgr from 9.0 to 9.1,
so your 9.0 configuration will work with 9.1
Additional parameters must be added to postgresql.conf to take advantage of
the new 9.1 features such as synchronous replication or hot standby feedback.
Requirements Requirements
------------ ------------
@@ -66,7 +77,7 @@ and run::
And if a previously failed node becomes available again, such as And if a previously failed node becomes available again, such as
the lost node1 above, you can get it to resynchronize by only copying the lost node1 above, you can get it to resynchronize by only copying
over changes made while it was down using. That hapens with what's over changes made while it was down. That happens with what's
called a forced clone, which overwrites existing data rather than called a forced clone, which overwrites existing data rather than
assuming it starts with an empty database directory tree:: assuming it starts with an empty database directory tree::
@@ -120,19 +131,19 @@ If you need to remove the source code temporary files from this directory,
that can be done like this:: that can be done like this::
make USE_PGXS=1 clean make USE_PGXS=1 clean
See below for building notes specific to RedHat Linux variants. See below for building notes specific to RedHat Linux variants.
Using a full source code tree Using a full source code tree
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In this method, the repmgr distribution is copied into the PostgreSQL source In this method, the repmgr distribution is copied into the PostgreSQL source
code tree, assumed to be at the ${postgresql_sources} for this example. code tree, assumed to be under ${postgresql_sources} for this example.
The resulting subdirectory must be named ``contrib/repmgr``, without any The resulting subdirectory must be named ``contrib/repmgr``, without any
version number:: version number::
cp repmgr.tar.gz ${postgresql_sources}/contrib cp repmgr.tar.gz ${postgresql_sources}/contrib
cd ${postgresql_sources}/contrib cd ${postgresql_sources}/contrib
tar xvzf repmgr-1.0.tar.gz tar xvzf repmgr-1.0.tar.gz
cd repmgr cd repmgr
make make
@@ -226,7 +237,7 @@ If you already tried to build repmgr before doing this, you'll need to do::
make USE_PGXS=1 clean make USE_PGXS=1 clean
To get rid of leftover files from the wrong architecture. to get rid of leftover files from the wrong architecture.
Notes on Ubuntu, Debian or other Debian-based Builds Notes on Ubuntu, Debian or other Debian-based Builds
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -268,8 +279,8 @@ Confirm software was built correctly
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You should now find the repmgr programs available in the subdirectory where You should now find the repmgr programs available in the subdirectory where
the rest of your PostgreSQL installation is at. You can confirm the software the rest of your PostgreSQL binary files are located. You can confirm the
is available by checking its version:: software is available by checking its version::
repmgr --version repmgr --version
repmgrd --version repmgrd --version
@@ -309,7 +320,7 @@ keys and a maching authorization file to a privledged user on the other system::
[postgres@node1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys [postgres@node1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[postgres@node1]$ chmod go-rwx ~/.ssh/* [postgres@node1]$ chmod go-rwx ~/.ssh/*
[postgres@node1]$ cd ~/.ssh [postgres@node1]$ cd ~/.ssh
[postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys postgres@node2: [postgres@node1]$ scp id_rsa.pub id_rsa authorized_keys user@node2:
Login as a user on the other system, and install the files into the postgres Login as a user on the other system, and install the files into the postgres
user's account:: user's account::
@@ -363,10 +374,10 @@ Usage walkthrough
This assumes you've already followed the steps in "Installation Outline" to This assumes you've already followed the steps in "Installation Outline" to
install repmgr and repmgrd on the system. install repmgr and repmgrd on the system.
A normal production installation of ``repmgr`` will normally involve two A typical production installation of ``repmgr`` might involve two PostgreSQL
different systems running on the same port, typically the default of 5432, instances on seperate servers, both running under the ``postgres`` user account
with both using files owned by the ``postgres`` user account. This and both using the default port (5432). This walkthrough assumes the following
walkthrough assumes the following setup: setup:
* A primary (master) server called "node1," running as the "postgres" user * A primary (master) server called "node1," running as the "postgres" user
who is also the owner of the files. This server is operating on port 5432. This who is also the owner of the files. This server is operating on port 5432. This
@@ -378,7 +389,7 @@ walkthrough assumes the following setup:
* Another standby server called "node3" with a similar configuration to "node2". * Another standby server called "node3" with a similar configuration to "node2".
* The Postgress installation in each of the above is defined as $PGDATA, * The Postgres installation in each of the above is defined as $PGDATA,
which is represented here as ``/var/lib/pgsql/9.0/data`` which is represented here as ``/var/lib/pgsql/9.0/data``
Creating some sample data Creating some sample data
@@ -503,12 +514,14 @@ following the standard directory structure of a RHEL system. It should contain:
cluster=test cluster=test
node=1 node=1
node_name=earth
conninfo='host=node1 user=repmgr dbname=pgbench' conninfo='host=node1 user=repmgr dbname=pgbench'
On "node2" create the file ``/var/lib/pgsql/repmgr/repmgr.conf`` with:: On "node2" create the file ``/var/lib/pgsql/repmgr/repmgr.conf`` with::
cluster=test cluster=test
node=2 node=2
node_name=mars
conninfo='host=node2 user=repmgr dbname=pgbench' conninfo='host=node2 user=repmgr dbname=pgbench'
The STANDBY CLONE process should have created a recovery.conf file on The STANDBY CLONE process should have created a recovery.conf file on
@@ -612,18 +625,18 @@ Now restore to the original configuration by stopping
primary server, then bringing up "node2" as a standby with a valid primary server, then bringing up "node2" as a standby with a valid
``recovery.conf`` file. ``recovery.conf`` file.
Stop the "node2" server:: Stop the "node2" server and type the following on "node1" server::
repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote repmgr -f /var/lib/pgsql/repmgr/repmgr.conf standby promote
Now the original primary, "node1" is acting again as primary. Now the original primary, "node1", is acting again as primary.
Start the "node2" server and type this on "node1":: Start the "node2" server and type this on "node2"::
repmgr standby clone --force -h node2 -p 5432 -U postgres -R postgres --verbose repmgr standby clone --force -h node2 -p 5432 -U postgres -R postgres --verbose
Verify the roles have reversed by attempting to insert a record on "node" Verify the roles have reversed by attempting to insert a record on "node1"
and on "node1". and on "node2".
The servers are now again acting as primary on "node1" and standby on "node2". The servers are now again acting as primary on "node1" and standby on "node2".
@@ -647,7 +660,7 @@ You can usually leave out changes to the port number in this case too.
* A database exists on "prime" called "testdb." * A database exists on "prime" called "testdb."
* The Postgress installation in each of the above is defined as $PGDATA, * The Postgres installation in each of the above is defined as $PGDATA,
which is represented here with ``/data/prime`` as the "prime" server and which is represented here with ``/data/prime`` as the "prime" server and
``/data/standby`` as the "standby" server. ``/data/standby`` as the "standby" server.
@@ -701,12 +714,14 @@ and it should contain::
cluster=test cluster=test
node=1 node=1
node_name=earth
conninfo='host=127.0.0.1 dbname=testdb' conninfo='host=127.0.0.1 dbname=testdb'
On "standby" create the file ``/home/standby/repmgr/repmgr.conf`` with:: On "standby" create the file ``/home/standby/repmgr/repmgr.conf`` with::
cluster=test cluster=test
node=2 node=2
node_name=mars
conninfo='host=127.0.0.1 dbname=testdb' conninfo='host=127.0.0.1 dbname=testdb'
Next, with "prime" server running, we want to use the ``clone standby`` command Next, with "prime" server running, we want to use the ``clone standby`` command
@@ -824,12 +839,11 @@ Also, if you don't do anything about it the monitor history will keep growing.
For both of those reasons you sometime want to make some maintainance of the For both of those reasons you sometime want to make some maintainance of the
``repl_monitor`` table. ``repl_monitor`` table.
If you want to clean the history after a few days you can execute a If you want to clean the history after a few days you can execute the
truncate/delete (wheter you want to completely clean history or want to keep CLUSTER CLEANUP command in a cron. For example to keep just one day of history
a few days of history) in a cron. For example to keep just one day of history
you can put this in your crontab:: you can put this in your crontab::
0 1 * * * psql -c "DELETE FROM repmgr_schema.repl_monitor where now() - last_monitor_time >= '1 day'::interval;" postgres 0 1 * * * repmgr cluster cleanup -k 1 -f ~/repmgr.conf
Configuration and command reference Configuration and command reference
=================================== ===================================
@@ -862,7 +876,6 @@ The output from this program looks like this::
Usage: Usage:
repmgr [OPTIONS] master {register} repmgr [OPTIONS] master {register}
repmgr [OPTIONS] standby {register|clone|promote|follow} repmgr [OPTIONS] standby {register|clone|promote|follow}
repmgr [OPTIONS] cluster {show|cleanup}
General options: General options:
--help show this help, then exit --help show this help, then exit
@@ -877,12 +890,11 @@ The output from this program looks like this::
Configuration options: Configuration options:
-D, --data-dir=DIR local directory where the files will be copied to -D, --data-dir=DIR local directory where the files will be copied to
-f, --config_file=PATH path to the configuration file -f, --config-file=PATH path to the configuration file
-R, --remote-user=USERNAME database server username for rsync -R, --remote-user=USERNAME database server username for rsync
-w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000) -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)
-I, --ignore-rsync-warning ignore rsync partial transfer warning
-F, --force force potentially dangerous operations to happen -F, --force force potentially dangerous operations to happen
-I, --ignore-rsync-warning Ignore partial transfert warning
-k, --keep-history keeps indicated number of days of history
repmgr performs some tasks like clone a node, promote it or making follow another node and then exits. repmgr performs some tasks like clone a node, promote it or making follow another node and then exits.
COMMANDS: COMMANDS:
@@ -891,8 +903,6 @@ The output from this program looks like this::
standby clone [node] - allows creation of a new standby standby clone [node] - allows creation of a new standby
standby promote - allows manual promotion of a specific standby into a new master in the event of a failover standby promote - allows manual promotion of a specific standby into a new master in the event of a failover
standby follow - allows the standby to re-point itself to a new master standby follow - allows the standby to re-point itself to a new master
cluster show - print node informations
cluster cleanup - cleans monitor's history
The ``--verbose`` option can be useful in troubleshooting issues with The ``--verbose`` option can be useful in troubleshooting issues with
the program. the program.
@@ -1003,7 +1013,8 @@ The output from this program looks like this::
--help show this help, then exit --help show this help, then exit
--version output version information, then exit --version output version information, then exit
--verbose output verbose activity information --verbose output verbose activity information
-f, --config_file=PATH database to connect to --monitoring-history track advance or lag of the replication in every standby in repl_monitor
-f, --config-file=PATH path to the configuration file
repmgrd monitors a cluster of servers. repmgrd monitors a cluster of servers.
@@ -1033,6 +1044,10 @@ Lag monitoring
repmgrd helps monitor a set of master and standby servers. You can repmgrd helps monitor a set of master and standby servers. You can
see which node is the current master, as well as how far behind each see which node is the current master, as well as how far behind each
is from current. is from current.
To activate the monitor capabilities of repmgr you must include the
option --monitoring-history when running it::
repmgrd --monitoring-history --config-file=/path/to/repmgr.conf &
To look at the current lag between primary and each node listed To look at the current lag between primary and each node listed
in ``repl_node``, consult the ``repl_status`` view:: in ``repl_node``, consult the ``repl_status`` view::
@@ -1065,16 +1080,21 @@ following
* ERR_DB_QUERY 7: Error executing a database query. * ERR_DB_QUERY 7: Error executing a database query.
* ERR_PROMOTED 8: Exiting program because the node has been promoted to master. * ERR_PROMOTED 8: Exiting program because the node has been promoted to master.
* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected. * ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected.
* ERR_STR_OVERFLOW 10: A string was larger than expected.
License and Contributions License and Contributions
========================= =========================
repmgr is licensed under the GPL v3. All of its code and documentation is repmgr is licensed under the GPL v3. All of its code and documentation is
Copyright 2010-2011, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for Copyright 2010-2014, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
details. details.
Contributions to repmgr are welcome, and listed in the file CREDITS. Main sponsorship of repmgr has been from 2ndQuadrant customers.
Additional work has been sponsored by the 4CaaST project for cloud computing,
which has received funding from the European Union's Seventh Framework Programme
(FP7/2007-2013) under grant agreement 258862.
Contributions to repmgr are welcome, and will be listed in the file CREDITS.
2ndQuadrant Limited requires that any contributions provide a copyright 2ndQuadrant Limited requires that any contributions provide a copyright
assignment and a disclaimer of any work-for-hire ownership claims from the assignment and a disclaimer of any work-for-hire ownership claims from the
employer of the developer. This lets us make sure that all of the repmgr employer of the developer. This lets us make sure that all of the repmgr
@@ -1090,3 +1110,35 @@ Code in repmgr is formatted to a consistent style using the following command::
Contributors should reformat their code similarly before submitting code to Contributors should reformat their code similarly before submitting code to
the project, in order to minimize merge conflicts with other work. the project, in order to minimize merge conflicts with other work.
Support and Assistance
======================
2ndQuadrant provides 24x7 production support for repmgr, as well as help you
configure it correctly, verify an installation and train you in running a
robust replication cluster.
There is a mailing list/forum to discuss contributions or issues
http://groups.google.com/group/repmgr
#repmgr is registered in freenode IRC
Further information is available at http://www.repmgr.org/
We'd love to hear from you about how you use repmgr. Case studies and
news are always welcome. Send us an email at info@2ndQuadrant.com, or
send a postcard to
repmgr
c/o 2ndQuadrant
7200 The Quorum
Oxford Business Park North
Oxford
OX4 2JZ
Thanks from the repmgr core team
Jaime Casanova
Simon Riggs
Greg Smith
Cedric Villemain

57
RHEL/repmgr.spec Normal file
View File

@@ -0,0 +1,57 @@
Summary: repmgr
Name: repmgr
Version: 2.0
Release: 2
License: GPLv3
Group: System Environment/Daemons
URL: http://repmgr.org
Packager: Nathan Van Overloop <nathan.van.overloop@nexperteam.be>
Vendor: 2ndQuadrant Limited
Distribution: centos
Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
%description
repmgr for centos6
%prep
%setup
%build
export PATH=$PATH:/usr/pgsql-9.3/bin/
%{__make} USE_PGXS=1
%install
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
export PATH=$PATH:/usr/pgsql-9.3/bin/
%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
%clean
[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
%files
%defattr(-,root,root)
/usr/bin/repmgr
/usr/bin/repmgrd
/usr/pgsql-9.3/bin/repmgr
/usr/pgsql-9.3/bin/repmgrd
/usr/pgsql-9.3/lib/repmgr_funcs.so
/usr/pgsql-9.3/share/contrib/repmgr.sql
/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
%attr(0755,root,root)/etc/init.d/repmgrd
%attr(0644,root,root)/etc/sysconfig/repmgrd
%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
%changelog
* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
- fix witness creation to create db and user if needed
* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
- initial build for RHEL6

114
RHEL/repmgrd.init Executable file
View File

@@ -0,0 +1,114 @@
#!/bin/bash
#
# repmgrd Start up the repmgrd daemon
# repmrgd (replication manager daemon)
#
# chkconfig: - 75 16
# description: repmgrd is the repliation manager daemon \
# The repmgrd replication management and monitoring daemon for PostgreSQL.
### BEGIN INIT INFO
# Provides: repmgrd
# Required-Start: $local_fs $remote_fs $network $syslog postgresql
# Required-Stop: $local_fs $remote_fs $network $syslog postgresql
# Should-Start: $syslog postgresql-9.3
# Should-Stop: $syslog postgresql-9.3
# Short-Description: start and stop repmrgd
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
# this is used to monitor a postgresql cluster.
### END INIT INFO
# Source function library.
. /etc/init.d/functions
# Source networking configuration.
. /etc/sysconfig/network
prog=repmgrd
REPMGRD_ENABLED=yes
REPMGRD_OPTS=
REPMGRD_USER=postgres
DAEMONIZE="-d"
# pull in sysconfig settings
[ -f /etc/sysconfig/repmgrd ] && . /etc/sysconfig/repmgrd
LOCKFILE=/var/lock/subsys/$prog
RETVAL=0
case "$REPMGRD_ENABLED" in
[Yy]*)
#nothing to do here
;;
*)
exit 2
;;
esac
if [ -z "$REPMGRD_OPTS" ]
then
echo "Not starting $prog, REPMGRD_OPTS not set in /etc/sysconfig/$prog"
exit 2
fi
start() {
[ "$EUID" != "0" ] && exit 4
[ "$NETWORKING" = "no" ] && exit 1
# Start daemons.
echo -n $"Starting $prog: "
daemon --user $REPMGRD_USER $prog $DAEMONIZE $REPMGRD_OPTS
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch $LOCKFILE
return $RETVAL
}
stop() {
[ "$EUID" != "0" ] && exit 4
echo -n $"Shutting down $prog: "
killproc $prog
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f $LOCKFILE
return $RETVAL
}
status() {
if [ -f "$LOCKFILE" ]; then
echo "$prog is running"
else
RETVAL=3
echo "$prog is stopped"
fi
return $RETVAL
}
# See how we were called.
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status $prog
;;
restart|force-reload)
stop
start
;;
try-restart|condrestart)
if status $prog > /dev/null; then
stop
start
fi
;;
reload)
exit 3
;;
*)
echo $"Usage: $0 {start|stop|status|restart|try-restart|force-reload}"
exit 2
esac

4
RHEL/repmgrd.sysconfig Normal file
View File

@@ -0,0 +1,4 @@
#default sysconfig file for repmrgd
#custom overrides can be placed here
REPMGRD_OPTS="-f /etc/repmgr/repmgr.conf"

18
TODO
View File

@@ -1,14 +1,18 @@
Known issues in repmgr Known issues in repmgr
====================== ======================
* The check for whether ``wal_keep_segments`` is considered large enough
does a string comparison rather than an integer one. It can give both
false positive (setting is large enough but flagged as too small) and
false negative (setting is too small but not noted as such) errors.
* When running repmgr against a remote machine, operations that start * When running repmgr against a remote machine, operations that start
the database server using the ``pg_ctl`` command may accidentally the database server using the ``pg_ctl`` command may accidentally
terminate after their associated ssh session ends. terminate after their associated ssh session ends.
* After running repmgrd as a regular foreground application, hitting Planned feature improvements
control-C causes the program to crash. ============================
* Timeline increases when promoting a standby
* A better check which standby did receive most of the data
* Make the fact that a standby may be delayed a factor in the voting
algorithm
* include support for delayed standbys

View File

@@ -0,0 +1,225 @@
=====================================================
PostgreSQL Automatic Fail-Over - User Documentation
=====================================================
Automatic Failover
==================
repmgr allows setups for automatic failover when it detects the failure of the master node.
Following is a quick setup for this.
Installation
============
For convenience, we define:
**node1**
is the hostname fully qualified of the Master server, IP 192.168.1.10
**node2**
is the hostname fully qualified of the Standby server, IP 192.168.1.11
**witness**
is the hostname fully qualified of the server used for witness, IP 192.168.1.12
**Note:** It is not recommanded to use name defining status of a server like «masterserver»,
this is a name leading to confusion once a failover take place and the Master is
now on the «standbyserver».
Summary
-------
2 PostgreSQL servers are involved in the replication. Automatic fail-over need
to vote to decide what server it should promote, thus an odd number is required
and a witness-repmgrd is installed in a third server where it uses a PostgreSQL
cluster to communicate with other repmgrd daemons.
1. Install PostgreSQL in all the servers involved (including the server used for
witness)
2. Install repmgr in all the servers involved (including the server used for witness)
3. Configure the Master PostreSQL
4. Clone the Master to the Standby using "repmgr standby clone" command
5. Configure repmgr in all the servers involved (including the server used for witness)
6. Register Master and Standby nodes
7. Initiate witness server
8. Start the repmgrd daemons in all nodes
**Note** A complete High-Availability design needs at least 3 servers to still have
a backup node after a first failure.
Install PostgreSQL
------------------
You can install PostgreSQL using any of the recommended methods. You should ensure
it's 9.0 or later.
Install repmgr
--------------
Install repmgr following the steps in the README file.
Configure PostreSQL
-------------------
Log in node1.
Edit the file postgresql.conf and modify the parameters::
listen_addresses='*'
wal_level = 'hot_standby'
archive_mode = on
archive_command = 'cd .' # we can also use exit 0, anything that
# just does nothing
max_wal_senders = 10
wal_keep_segments = 5000 # 80 GB required on pg_xlog
hot_standby = on
shared_preload_libraries = 'repmgr_funcs'
Edit the file pg_hba.conf and add lines for the replication::
host repmgr repmgr 127.0.0.1/32 trust
host repmgr repmgr 192.168.1.10/30 trust
host replication all 192.168.1.10/30 trust
**Note:** It is also possible to use a password authentication (md5), .pgpass file
should be edited to allow connection between each node.
Create the user and database to manage replication::
su - postgres
createuser -s repmgr
createdb -O repmgr repmgr
psql -f /usr/share/postgresql/9.0/contrib/repmgr_funcs.sql repmgr
Restart the PostgreSQL server::
pg_ctl -D $PGDATA restart
And check everything is fine in the server log.
Create the ssh-key for the postgres user and copy it to other servers::
su - postgres
ssh-keygen # /!\ do not use a passphrase /!\
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
exit
rsync -avz ~postgres/.ssh/authorized_keys node2:~postgres/.ssh/
rsync -avz ~postgres/.ssh/authorized_keys witness:~postgres/.ssh/
rsync -avz ~postgres/.ssh/id_rsa* node2:~postgres/.ssh/
rsync -avz ~postgres/.ssh/id_rsa* witness:~postgres/.ssh/
Clone Master
------------
Log in node2.
Clone the node1 (the current Master)::
su - postgres
repmgr -d repmgr -U repmgr -h node1 standby clone
Start the PostgreSQL server::
pg_ctl -D $PGDATA start
And check everything is fine in the server log.
Configure repmgr
----------------
Log in each server and configure repmgr by editing the file
/etc/repmgr/repmgr.conf::
cluster=my_cluster
node=1
node_name=earth
conninfo='host=192.168.1.10 dbname=repmgr user=repmgr'
master_response_timeout=60
reconnect_attempts=6
reconnect_interval=10
failover=automatic
promote_command='promote_command.sh'
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
**cluster**
is the name of the current replication.
**node**
is the number of the current node (1, 2 or 3 in the current example).
**node_name**
is an identifier for every node.
**conninfo**
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
**master_response_timeout**
is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
**reconnect_attempts**
is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
**reconnect_interval**
is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
**failover**
configure behavior: *manual* or *automatic*.
**promote_command**
the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
**follow_command**
the command executed to address the current standby to another Master. The command must return 0 on success.
Register Master and Standby
---------------------------
Log in node1.
Register the node as Master::
su - postgres
repmgr -f /etc/repmgr/repmgr.conf master register
Log in node2. Register it as a standby::
su - postgres
repmgr -f /etc/repmgr/repmgr.conf standby register
Initialize witness server
-------------------------
Log in witness.
Initialize the witness server::
su - postgres
repmgr -d repmgr -U repmgr -h 192.168.1.10 -D $WITNESS_PGDATA -f /etc/repmgr/repmgr.conf witness create
It needs information to connect to the master to copy the configuration of the cluster, also it needs to know where it should initialize it's own $PGDATA.
As part of the procees it also ask for the superuser password so it can connect when needed.
Start the repmgrd daemons
-------------------------
Log in node2 and witness.
su - postgres
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
**Note:** The Master does not need a repmgrd daemon.
Suspend Automatic behavior
==========================
Edit the repmgr.conf of the node to remove from automatic processing and change::
failover=manual
Then, signal repmgrd daemon::
su - postgres
kill -HUP `pidof repmgrd`
Usage
=====
The repmgr documentation is in the README file (how to build, options, etc.)

View File

@@ -1,6 +1,6 @@
/* /*
* check_dir.c - Directories management functions * check_dir.c - Directories management functions
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -31,8 +31,6 @@
#include "strutil.h" #include "strutil.h"
#include "log.h" #include "log.h"
static int mkdir_p(char *path, mode_t omode);
/* /*
* make sure the directory either doesn't exist or is empty * make sure the directory either doesn't exist or is empty
* we use this function to check the new data directory and * we use this function to check the new data directory and
@@ -46,9 +44,9 @@ static int mkdir_p(char *path, mode_t omode);
int int
check_dir(char *dir) check_dir(char *dir)
{ {
DIR *chkdir; DIR *chkdir;
struct dirent *file; struct dirent *file;
int result = 1; int result = 1;
errno = 0; errno = 0;
@@ -60,7 +58,7 @@ check_dir(char *dir)
while ((file = readdir(chkdir)) != NULL) while ((file = readdir(chkdir)) != NULL)
{ {
if (strcmp(".", file->d_name) == 0 || if (strcmp(".", file->d_name) == 0 ||
strcmp("..", file->d_name) == 0) strcmp("..", file->d_name) == 0)
{ {
/* skip this and parent directory */ /* skip this and parent directory */
continue; continue;
@@ -73,6 +71,7 @@ check_dir(char *dir)
} }
#ifdef WIN32 #ifdef WIN32
/* /*
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
* released version * released version
@@ -84,29 +83,29 @@ check_dir(char *dir)
closedir(chkdir); closedir(chkdir);
if (errno != 0) if (errno != 0)
return -1; /* some kind of I/O error? */ return -1; /* some kind of I/O error? */
return result; return result;
} }
/* /*
* Create directory * Create directory with error log message when failing
*/ */
bool bool
create_directory(char *dir) create_dir(char *dir)
{ {
if (mkdir_p(dir, 0700) == 0) if (mkdir_p(dir, 0700) == 0)
return true; return true;
log_err(_("Could not create directory \"%s\": %s\n"), log_err(_("Could not create directory \"%s\": %s\n"),
dir, strerror(errno)); dir, strerror(errno));
return false; return false;
} }
bool bool
set_directory_permissions(char *dir) set_dir_permissions(char *dir)
{ {
return (chmod(dir, 0700) != 0) ? false : true; return (chmod(dir, 0700) != 0) ? false : true;
} }
@@ -124,15 +123,15 @@ set_directory_permissions(char *dir)
* note that on failure, the path arg has been modified to show the particular * note that on failure, the path arg has been modified to show the particular
* directory level we had problems with. * directory level we had problems with.
*/ */
static int int
mkdir_p(char *path, mode_t omode) mkdir_p(char *path, mode_t omode)
{ {
struct stat sb; struct stat sb;
mode_t numask, mode_t numask,
oumask; oumask;
int first, int first,
last, last,
retval; retval;
char *p; char *p;
p = path; p = path;
@@ -151,8 +150,8 @@ mkdir_p(char *path, mode_t omode)
return 1; return 1;
} }
else if (p[1] == ':' && else if (p[1] == ':' &&
((p[0] >= 'a' && p[0] <= 'z') || ((p[0] >= 'a' && p[0] <= 'z') ||
(p[0] >= 'A' && p[0] <= 'Z'))) (p[0] >= 'A' && p[0] <= 'Z')))
{ {
/* local drive */ /* local drive */
p += 2; p += 2;
@@ -223,10 +222,87 @@ bool
is_pg_dir(char *dir) is_pg_dir(char *dir)
{ {
const size_t buf_sz = 8192; const size_t buf_sz = 8192;
char path[buf_sz]; char path[buf_sz];
struct stat sb; struct stat sb;
int r;
/* test pgdata */
xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); xsnprintf(path, buf_sz, "%s/PG_VERSION", dir);
if (stat(path, &sb) == 0)
return true;
return (stat(path, &sb) == 0) ? true : false; /* test tablespace dir */
sprintf(path, "ls %s/PG_*/ -I*", dir);
r = system(path);
if (r == 0)
return true;
return false;
}
bool
create_pg_dir(char *dir, bool force)
{
bool pg_dir = false;
/* Check this directory could be used as a PGDATA dir */
switch (check_dir(dir))
{
case 0:
/* dir not there, must create it */
log_info(_("creating directory \"%s\"...\n"), dir);
if (!create_dir(dir))
{
log_err(_("couldn't create directory \"%s\"...\n"),
dir);
return false;
}
break;
case 1:
/* Present but empty, fix permissions and use it */
log_info(_("checking and correcting permissions on existing directory %s ...\n"),
dir);
if (!set_dir_permissions(dir))
{
log_err(_("could not change permissions of directory \"%s\": %s\n"),
dir, strerror(errno));
return false;
}
break;
case 2:
/* Present and not empty */
log_warning(_("directory \"%s\" exists but is not empty\n"),
dir);
pg_dir = is_pg_dir(dir);
/*
* we use force to reduce the time needed to restore a node which
* turn async after a failover or anything else
*/
if (pg_dir && force)
{
/* Let it continue */
break;
}
else if (pg_dir && !force)
{
log_warning(_("\nThis looks like a PostgreSQL directory.\n"
"If you are sure you want to clone here, "
"please check there is no PostgreSQL server "
"running and use the --force option\n"));
return false;
}
return false;
default:
/* Trouble accessing directory */
log_err(_("could not access directory \"%s\": %s\n"),
dir, strerror(errno));
return false;
}
return true;
} }

View File

@@ -1,6 +1,6 @@
/* /*
* check_dir.h * check_dir.h
* Copyright (c) 2ndQuadrant, 2010-2011 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -20,9 +20,11 @@
#ifndef _REPMGR_CHECK_DIR_H_ #ifndef _REPMGR_CHECK_DIR_H_
#define _REPMGR_CHECK_DIR_H_ #define _REPMGR_CHECK_DIR_H_
int check_dir(char *dir); int mkdir_p(char *path, mode_t omode);
bool create_directory(char *dir); int check_dir(char *dir);
bool set_directory_permissions(char *dir); bool create_dir(char *dir);
bool is_pg_dir(char *dir); bool set_dir_permissions(char *dir);
bool is_pg_dir(char *dir);
bool create_pg_dir(char *dir, bool force);
#endif #endif

252
config.c
View File

@@ -1,6 +1,6 @@
/* /*
* config.c - Functions to parse the config file * config.c - Functions to parse the config file
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -18,36 +18,57 @@
*/ */
#include "config.h" #include "config.h"
#include "repmgr.h" #include "log.h"
#include "strutil.h" #include "strutil.h"
#include "repmgr.h"
void void
parse_config(const char* config_file, t_configuration_options* options) parse_config(const char *config_file, t_configuration_options * options)
{ {
char *s, buff[MAXLINELENGTH]; char *s,
char name[MAXLEN]; buff[MAXLINELENGTH];
char value[MAXLEN]; char name[MAXLEN];
char value[MAXLEN];
FILE *fp = fopen (config_file, "r"); FILE *fp = fopen(config_file, "r");
/* Initialize */ /* Initialize */
memset(options->cluster_name, 0, sizeof(options->cluster_name)); memset(options->cluster_name, 0, sizeof(options->cluster_name));
options->node = -1; options->node = -1;
memset(options->conninfo, 0, sizeof(options->conninfo)); memset(options->conninfo, 0, sizeof(options->conninfo));
options->failover = MANUAL_FAILOVER;
options->priority = 0;
memset(options->node_name, 0, sizeof(options->node_name));
memset(options->promote_command, 0, sizeof(options->promote_command));
memset(options->follow_command, 0, sizeof(options->follow_command));
memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->rsync_options, 0, sizeof(options->rsync_options));
memset(options->ssh_options, 0, sizeof(options->ssh_options));
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
memset(options->pgctl_options, 0, sizeof(options->pgctl_options));
/* if nothing has been provided defaults to 60 */
options->master_response_timeout = 60;
/* it defaults to 6 retries with a time between retries of 10s */
options->reconnect_attempts = 6;
options->reconnect_intvl = 10;
options->monitor_interval_secs = 2;
options->retry_promote_interval_secs = 300;
/* /*
* Since some commands don't require a config file at all, not * Since some commands don't require a config file at all, not having one
* having one isn't necessarily a problem. * isn't necessarily a problem.
*/ */
if (fp == NULL) if (fp == NULL)
{ {
fprintf(stderr, _("Did not find the configuration file '%s', continuing\n"), config_file); log_err(_("Did not find the configuration file '%s', continuing\n"),
config_file);
return; return;
} }
/* Read next line */ /* Read next line */
while ((s = fgets (buff, sizeof buff, fp)) != NULL) while ((s = fgets(buff, sizeof buff, fp)) != NULL)
{ {
/* Skip blank lines and comments */ /* Skip blank lines and comments */
if (buff[0] == '\n' || buff[0] == '#') if (buff[0] == '\n' || buff[0] == '#')
@@ -58,70 +79,138 @@ parse_config(const char* config_file, t_configuration_options* options)
/* Copy into correct entry in parameters struct */ /* Copy into correct entry in parameters struct */
if (strcmp(name, "cluster") == 0) if (strcmp(name, "cluster") == 0)
strncpy (options->cluster_name, value, MAXLEN); strncpy(options->cluster_name, value, MAXLEN);
else if (strcmp(name, "node") == 0) else if (strcmp(name, "node") == 0)
options->node = atoi(value); options->node = atoi(value);
else if (strcmp(name, "conninfo") == 0) else if (strcmp(name, "conninfo") == 0)
strncpy (options->conninfo, value, MAXLEN); strncpy(options->conninfo, value, MAXLEN);
else if (strcmp(name, "rsync_options") == 0) else if (strcmp(name, "rsync_options") == 0)
strncpy (options->rsync_options, value, QUERY_STR_LEN); strncpy(options->rsync_options, value, QUERY_STR_LEN);
else if (strcmp(name, "ssh_options") == 0)
strncpy(options->ssh_options, value, QUERY_STR_LEN);
else if (strcmp(name, "loglevel") == 0) else if (strcmp(name, "loglevel") == 0)
strncpy (options->loglevel, value, MAXLEN); strncpy(options->loglevel, value, MAXLEN);
else if (strcmp(name, "logfacility") == 0) else if (strcmp(name, "logfacility") == 0)
strncpy (options->logfacility, value, MAXLEN); strncpy(options->logfacility, value, MAXLEN);
else if (strcmp(name, "failover") == 0)
{
char failoverstr[MAXLEN];
strncpy(failoverstr, value, MAXLEN);
if (strcmp(failoverstr, "manual") == 0)
options->failover = MANUAL_FAILOVER;
else if (strcmp(failoverstr, "automatic") == 0)
options->failover = AUTOMATIC_FAILOVER;
else
{
log_warning(_("value for failover option is incorrect, it should be automatic or manual. Defaulting to manual.\n"));
options->failover = MANUAL_FAILOVER;
}
}
else if (strcmp(name, "priority") == 0)
options->priority = atoi(value);
else if (strcmp(name, "node_name") == 0)
strncpy(options->node_name, value, MAXLEN);
else if (strcmp(name, "promote_command") == 0)
strncpy(options->promote_command, value, MAXLEN);
else if (strcmp(name, "follow_command") == 0)
strncpy(options->follow_command, value, MAXLEN);
else if (strcmp(name, "master_response_timeout") == 0)
options->master_response_timeout = atoi(value);
else if (strcmp(name, "reconnect_attempts") == 0)
options->reconnect_attempts = atoi(value);
else if (strcmp(name, "reconnect_interval") == 0)
options->reconnect_intvl = atoi(value);
else if (strcmp(name, "pg_bindir") == 0)
strncpy(options->pg_bindir, value, MAXLEN);
else if (strcmp(name, "pg_ctl_options") == 0)
strncpy(options->pgctl_options, value, MAXLEN);
else if (strcmp(name, "logfile") == 0)
strncpy(options->logfile, value, MAXLEN);
else if (strcmp(name, "monitor_interval_secs") == 0)
options->monitor_interval_secs = atoi(value);
else if (strcmp(name, "retry_promote_interval_secs") == 0)
options->retry_promote_interval_secs = atoi(value);
else else
printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value); log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
} }
/* Close file */ /* Close file */
fclose (fp); fclose(fp);
/* Check config settings */ /* Check config settings */
if (strnlen(options->cluster_name, MAXLEN)==0) if (*options->cluster_name == '\0')
{ {
fprintf(stderr, "Cluster name is missing. " log_err(_("Cluster name is missing. Check the configuration file.\n"));
"Check the configuration file.\n");
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (options->node == -1) if (options->node == -1)
{ {
fprintf(stderr, "Node information is missing. " log_err(_("Node information is missing. Check the configuration file.\n"));
"Check the configuration file.\n"); exit(ERR_BAD_CONFIG);
}
if (options->master_response_timeout <= 0)
{
log_err(_("Master response timeout must be greater than zero. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG);
}
if (options->reconnect_attempts < 0)
{
log_err(_("Reconnect attempts must be zero or greater. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG);
}
if (options->reconnect_intvl <= 0)
{
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG);
}
if (*options->pg_bindir == '\0')
{
log_err(_("pg_bindir config value not found. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
} }
char * char *
trim (char *s) trim(char *s)
{ {
/* Initialize start, end pointers */ /* Initialize start, end pointers */
char *s1 = s, *s2 = &s[strlen (s) - 1]; char *s1 = s,
*s2 = &s[strlen(s) - 1];
/* Trim and delimit right side */ /* Trim and delimit right side */
while ( (isspace (*s2)) && (s2 >= s1) ) while ((isspace(*s2)) && (s2 >= s1))
--s2; --s2;
*(s2+1) = '\0'; *(s2 + 1) = '\0';
/* Trim left side */ /* Trim left side */
while ( (isspace (*s1)) && (s1 < s2) ) while ((isspace(*s1)) && (s1 < s2))
++s1; ++s1;
/* Copy finished string */ /* Copy finished string */
strcpy (s, s1); memmove(s, s1, s2 - s1);
s[s2 - s1 + 1] = '\0';
return s; return s;
} }
void void
parse_line(char *buff, char *name, char *value) parse_line(char *buff, char *name, char *value)
{ {
int i = 0; int i = 0;
int j = 0; int j = 0;
/* /*
* first we find the name of the parameter * first we find the name of the parameter
*/ */
for ( ; i < MAXLEN; ++i) for (; i < MAXLEN; ++i)
{ {
if (buff[i] != '=') if (buff[i] != '=')
name[j++] = buff[i]; name[j++] = buff[i];
@@ -134,7 +223,7 @@ parse_line(char *buff, char *name, char *value)
* Now the value * Now the value
*/ */
j = 0; j = 0;
for ( ++i ; i < MAXLEN; ++i) for (++i; i < MAXLEN; ++i)
if (buff[i] == '\'') if (buff[i] == '\'')
continue; continue;
else if (buff[i] != '\n') else if (buff[i] != '\n')
@@ -144,3 +233,100 @@ parse_line(char *buff, char *name, char *value)
value[j] = '\0'; value[j] = '\0';
trim(value); trim(value);
} }
bool
reload_config(char *config_file, t_configuration_options * orig_options)
{
PGconn *conn;
t_configuration_options new_options;
/*
* Re-read the configuration file: repmgr.conf
*/
log_info(_("Reloading configuration file and updating repmgr tables\n"));
parse_config(config_file, &new_options);
if (new_options.node == -1)
{
log_warning(_("Cannot load new configuration, will keep current one.\n"));
return false;
}
if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
{
log_warning(_("Cannot change cluster name, will keep current configuration.\n"));
return false;
}
if (new_options.node != orig_options->node)
{
log_warning(_("Cannot change node number, will keep current configuration.\n"));
return false;
}
if (strcmp(new_options.node_name, orig_options->node_name) != 0)
{
log_warning(_("Cannot change standby name, will keep current configuration.\n"));
return false;
}
if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
{
log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
return false;
}
if (new_options.master_response_timeout <= 0)
{
log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n"));
return false;
}
if (new_options.reconnect_attempts < 0)
{
log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
return false;
}
if (new_options.reconnect_intvl < 0)
{
log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
return false;
}
/* Test conninfo string */
conn = establish_db_connection(new_options.conninfo, false);
if (!conn || (PQstatus(conn) != CONNECTION_OK))
{
log_warning(_("conninfo string is not valid, will keep current configuration.\n"));
return false;
}
PQfinish(conn);
/* Configuration seems ok, will load new values */
strcpy(orig_options->cluster_name, new_options.cluster_name);
orig_options->node = new_options.node;
strcpy(orig_options->conninfo, new_options.conninfo);
orig_options->failover = new_options.failover;
orig_options->priority = new_options.priority;
strcpy(orig_options->node_name, new_options.node_name);
strcpy(orig_options->promote_command, new_options.promote_command);
strcpy(orig_options->follow_command, new_options.follow_command);
strcpy(orig_options->rsync_options, new_options.rsync_options);
strcpy(orig_options->ssh_options, new_options.ssh_options);
orig_options->master_response_timeout = new_options.master_response_timeout;
orig_options->reconnect_attempts = new_options.reconnect_attempts;
orig_options->reconnect_intvl = new_options.reconnect_intvl;
/*
* XXX These ones can change with a simple SIGHUP?
*
* strcpy (orig_options->loglevel, new_options.loglevel); strcpy
* (orig_options->logfacility, new_options.logfacility);
*
* logger_shutdown(); XXX do we have progname here ? logger_init(progname,
* orig_options.loglevel, orig_options.logfacility);
*/
return true;
}

View File

@@ -1,6 +1,6 @@
/* /*
* config.h * config.h
* Copyright (c) 2ndQuadrant, 2010-2011 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -25,16 +25,33 @@
typedef struct typedef struct
{ {
char cluster_name[MAXLEN]; char cluster_name[MAXLEN];
int node; int node;
char conninfo[MAXLEN]; char conninfo[MAXLEN];
char loglevel[MAXLEN]; int failover;
char logfacility[MAXLEN]; int priority;
char rsync_options[QUERY_STR_LEN]; char node_name[MAXLEN];
} t_configuration_options; char promote_command[MAXLEN];
char follow_command[MAXLEN];
char loglevel[MAXLEN];
char logfacility[MAXLEN];
char rsync_options[QUERY_STR_LEN];
char ssh_options[QUERY_STR_LEN];
int master_response_timeout;
int reconnect_attempts;
int reconnect_intvl;
char pg_bindir[MAXLEN];
char pgctl_options[MAXLEN];
char logfile[MAXLEN];
int monitor_interval_secs;
int retry_promote_interval_secs;
} t_configuration_options;
void parse_config(const char* config_file, t_configuration_options* options); #define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", 0, 0 }
void parse_line(char *buff, char *name, char *value);
char *trim(char *s); void parse_config(const char *config_file, t_configuration_options * options);
void parse_line(char *buff, char *name, char *value);
char *trim(char *s);
bool reload_config(char *config_file, t_configuration_options * orig_options);
#endif #endif

398
dbutils.c
View File

@@ -1,6 +1,6 @@
/* /*
* dbutils.c - Database connection/management functions * dbutils.c - Database connection/management functions
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -17,21 +17,30 @@
* *
*/ */
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include "repmgr.h" #include "repmgr.h"
#include "strutil.h" #include "strutil.h"
#include "log.h" #include "log.h"
PGconn * PGconn *
establishDBConnection(const char *conninfo, const bool exit_on_error) establish_db_connection(const char *conninfo, const bool exit_on_error)
{ {
/* Make a connection to the database */ /* Make a connection to the database */
PGconn *conn = PQconnectdb(conninfo); PGconn *conn = NULL;
char connection_string[MAXLEN];
strcpy(connection_string, conninfo);
strcat(connection_string, " fallback_application_name='repmgr'");
conn = PQconnectdb(connection_string);
/* Check to see that the backend connection was successfully made */ /* Check to see that the backend connection was successfully made */
if ((PQstatus(conn) != CONNECTION_OK)) if ((PQstatus(conn) != CONNECTION_OK))
{ {
log_err(_("Connection to database failed: %s\n"), log_err(_("Connection to database failed: %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
if (exit_on_error) if (exit_on_error)
{ {
@@ -44,16 +53,17 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
} }
PGconn * PGconn *
establishDBConnectionByParams(const char *keywords[], const char *values[],const bool exit_on_error) establish_db_connection_by_params(const char *keywords[], const char *values[],
const bool exit_on_error)
{ {
/* Make a connection to the database */ /* Make a connection to the database */
PGconn *conn = PQconnectdbParams(keywords, values, true); PGconn *conn = PQconnectdbParams(keywords, values, true);
/* Check to see that the backend connection was successfully made */ /* Check to see that the backend connection was successfully made */
if ((PQstatus(conn) != CONNECTION_OK)) if ((PQstatus(conn) != CONNECTION_OK))
{ {
log_err(_("Connection to database failed: %s\n"), log_err(_("Connection to database failed: %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
if (exit_on_error) if (exit_on_error)
{ {
PQfinish(conn); PQfinish(conn);
@@ -64,58 +74,133 @@ establishDBConnectionByParams(const char *keywords[], const char *values[],const
return conn; return conn;
} }
bool int
is_standby(PGconn *conn) is_standby(PGconn *conn)
{ {
PGresult *res; PGresult *res;
bool result; int result = 0;
res = PQexec(conn, "SELECT pg_is_in_recovery()"); res = PQexec(conn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("Can't query server mode: %s"), log_err(_("Can't query server mode: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); result = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0) result = 1;
result = false;
else
result = true;
PQclear(res); PQclear(res);
return result; return result;
} }
int
is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
{
PGresult *res;
int result = 0;
char sqlquery[QUERY_STR_LEN];
sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d",
schema, cluster, node_id);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't query server mode: %s"), PQerrorMessage(conn));
result = -1;
}
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
result = 1;
PQclear(res);
return result;
}
/* check the PQStatus and try to 'select 1' to confirm good connection */
bool
is_pgup(PGconn *conn, int timeout)
{
char sqlquery[QUERY_STR_LEN];
/* Check the connection status twice in case it changes after reset */
bool twice = false;
/* Check the connection status twice in case it changes after reset */
for (;;)
{
if (PQstatus(conn) != CONNECTION_OK)
{
if (twice)
return false;
PQreset(conn); /* reconnect */
twice = true;
}
else
{
/*
* Send a SELECT 1 just to check if the connection is OK
*/
if (!cancel_query(conn, timeout))
goto failed;
if (wait_connection_availability(conn, timeout) != 1)
goto failed;
sqlquery_snprintf(sqlquery, "SELECT 1");
if (PQsendQuery(conn, sqlquery) == 0)
{
log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
PQerrorMessage(conn));
goto failed;
}
if (wait_connection_availability(conn, timeout) != 1)
goto failed;
break;
failed:
/*
* we need to retry, because we might just have loose the
* connection once
*/
if (twice)
return false;
PQreset(conn); /* reconnect */
twice = true;
}
}
return true;
}
/* /*
* If postgreSQL version is 9 or superior returns the major version * If postgreSQL version is 9 or superior returns the major version
* if 8 or inferior returns an empty string * if 8 or inferior returns an empty string
*/ */
char * char *
pg_version(PGconn *conn, char* major_version) pg_version(PGconn *conn, char *major_version)
{ {
PGresult *res; PGresult *res;
int major_version1; int major_version1;
char *major_version2; char *major_version2;
res = PQexec(conn, res = PQexec(conn,
"WITH pg_version(ver) AS " "WITH pg_version(ver) AS "
"(SELECT split_part(version(), ' ', 2)) " "(SELECT split_part(version(), ' ', 2)) "
"SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) " "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) "
"FROM pg_version"); "FROM pg_version");
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("Version check PQexec failed: %s"), log_err(_("Version check PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); PQclear(res);
PQfinish(conn); return NULL;
exit(ERR_DB_QUERY);
} }
major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version1 = atoi(PQgetvalue(res, 0, 0));
@@ -125,7 +210,7 @@ pg_version(PGconn *conn, char* major_version)
{ {
/* form a major version string */ /* form a major version string */
xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1,
major_version2); major_version2);
} }
else else
strcpy(major_version, ""); strcpy(major_version, "");
@@ -136,59 +221,92 @@ pg_version(PGconn *conn, char* major_version)
} }
bool int
guc_setted(PGconn *conn, const char *parameter, const char *op, guc_set(PGconn *conn, const char *parameter, const char *op,
const char *value) const char *value)
{ {
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
int retval = 1;
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting %s '%s'", " WHERE name = '%s' AND setting %s '%s'",
parameter, op, value); parameter, op, value);
res = PQexec(conn, sqlquery); res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("GUC setting check PQexec failed: %s"), log_err(_("GUC setting check PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); retval = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
if (PQntuples(res) == 0) else if (PQntuples(res) == 0)
{ {
PQclear(res); retval = 0;
return false;
} }
PQclear(res); PQclear(res);
return true; return retval;
}
/**
* Just like guc_set except with an extra parameter containing the name of
* the pg datatype so that the comparison can be done properly.
*/
int
guc_set_typed(PGconn *conn, const char *parameter, const char *op,
const char *value, const char *datatype)
{
PGresult *res;
char sqlquery[QUERY_STR_LEN];
int retval = 1;
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
parameter, datatype, op, value, datatype);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("GUC setting check PQexec failed: %s"),
PQerrorMessage(conn));
retval = -1;
}
else if (PQntuples(res) == 0)
{
retval = 0;
}
PQclear(res);
return retval;
} }
const char * const char *
get_cluster_size(PGconn *conn) get_cluster_size(PGconn *conn)
{ {
PGresult *res; PGresult *res;
const char *size; const char *size = NULL;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
sqlquery_snprintf( sqlquery_snprintf(
sqlquery, sqlquery,
"SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
" FROM pg_database "); " FROM pg_database ");
res = PQexec(conn, sqlquery); res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("Get cluster size PQexec failed: %s"), log_err(_("Get cluster size PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
size = PQgetvalue(res, 0, 0); else
{
size = PQgetvalue(res, 0, 0);
}
PQclear(res); PQclear(res);
return size; return size;
} }
@@ -197,24 +315,23 @@ get_cluster_size(PGconn *conn)
* get a connection to master by reading repl_nodes, creating a connection * get a connection to master by reading repl_nodes, creating a connection
* to each node (one at a time) and finding if it is a master or a standby * to each node (one at a time) and finding if it is a master or a standby
* *
* NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to * NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to
* point to allocated memory of MAXCONNINFO in length, and the master server * point to allocated memory of MAXCONNINFO in length, and the master server
* connection string is placed there. * connection string is placed there.
*/ */
PGconn * PGconn *
getMasterConnection(PGconn *standby_conn, char *cluster, get_master_connection(PGconn *standby_conn, char *schema, char *cluster,
int *master_id, char *master_conninfo_out) int *master_id, char *master_conninfo_out)
{ {
PGconn *master_conn = NULL; PGconn *master_conn = NULL;
PGresult *res1; PGresult *res1;
PGresult *res2; PGresult *res2;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
char master_conninfo_stack[MAXCONNINFO]; char master_conninfo_stack[MAXCONNINFO];
char *master_conninfo = &*master_conninfo_stack; char *master_conninfo = &*master_conninfo_stack;
char schema_str[MAXLEN]; char schema_quoted[MAXLEN];
char schema_quoted[MAXLEN];
int i; int i;
/* /*
* If the caller wanted to get a copy of the connection info string, sub * If the caller wanted to get a copy of the connection info string, sub
@@ -228,10 +345,9 @@ getMasterConnection(PGconn *standby_conn, char *cluster,
* *
* Assemble the unquoted schema name * Assemble the unquoted schema name
*/ */
maxlen_snprintf(schema_str, "repmgr_%s", cluster);
{ {
char *identifier = PQescapeIdentifier(standby_conn, schema_str, char *identifier = PQescapeIdentifier(standby_conn, schema,
strlen(schema_str)); strlen(schema));
maxlen_snprintf(schema_quoted, "%s", identifier); maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier); PQfreemem(identifier);
@@ -239,45 +355,44 @@ getMasterConnection(PGconn *standby_conn, char *cluster,
/* find all nodes belonging to this cluster */ /* find all nodes belonging to this cluster */
log_info(_("finding node list for cluster '%s'\n"), log_info(_("finding node list for cluster '%s'\n"),
cluster); cluster);
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " sqlquery_snprintf(sqlquery, "SELECT id, conninfo FROM %s.repl_nodes "
" WHERE cluster = '%s'", " WHERE cluster = '%s' and not witness",
schema_quoted, cluster); schema_quoted, cluster);
res1 = PQexec(standby_conn, sqlquery); res1 = PQexec(standby_conn, sqlquery);
if (PQresultStatus(res1) != PGRES_TUPLES_OK) if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{ {
log_err(_("Can't get nodes info: %s\n"), log_err(_("Can't get nodes info: %s\n"),
PQerrorMessage(standby_conn)); PQerrorMessage(standby_conn));
PQclear(res1); PQclear(res1);
PQfinish(standby_conn); return NULL;
exit(ERR_DB_QUERY);
} }
for (i = 0; i < PQntuples(res1); i++) for (i = 0; i < PQntuples(res1); i++)
{ {
/* initialize with the values of the current node being processed */ /* initialize with the values of the current node being processed */
*master_id = atoi(PQgetvalue(res1, i, 0)); *master_id = atoi(PQgetvalue(res1, i, 0));
strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO); strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO);
log_info(_("checking role of cluster node '%s'\n"), log_info(_("checking role of cluster node '%s'\n"),
master_conninfo); master_conninfo);
master_conn = establishDBConnection(master_conninfo, false); master_conn = establish_db_connection(master_conninfo, false);
if (PQstatus(master_conn) != CONNECTION_OK) if (PQstatus(master_conn) != CONNECTION_OK)
continue; continue;
/* /*
* Can't use the is_standby() function here because on error that * Can't use the is_standby() function here because on error that
* function closes the connection passed and exits. This still * function closes the connection passed and exits. This still needs
* needs to close master_conn first. * to close master_conn first.
*/ */
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res2) != PGRES_TUPLES_OK) if (PQresultStatus(res2) != PGRES_TUPLES_OK)
{ {
log_err(_("Can't get recovery state from this node: %s\n"), log_err(_("Can't get recovery state from this node: %s\n"),
PQerrorMessage(master_conn)); PQerrorMessage(master_conn));
PQclear(res2); PQclear(res2);
PQfinish(master_conn); PQfinish(master_conn);
continue; continue;
@@ -299,15 +414,116 @@ getMasterConnection(PGconn *standby_conn, char *cluster,
} }
} }
/* If we finish this loop without finding a master then /*
* we doesn't have the info or the master has failed (or we * If we finish this loop without finding a master then we doesn't have
* reached max_connections or superuser_reserved_connections, * the info or the master has failed (or we reached max_connections or
* anything else I'm missing?). * superuser_reserved_connections, anything else I'm missing?).
* *
* Probably we will need to check the error to know if we need * Probably we will need to check the error to know if we need to start
* to start failover procedure or just fix some situation on the * failover procedure or just fix some situation on the standby.
* standby.
*/ */
PQclear(res1); PQclear(res1);
return NULL; return NULL;
} }
/*
* wait until current query finishes ignoring any results, this could be an
* async command or a cancelation of a query
* return 1 if Ok; 0 if any error ocurred; -1 if timeout reached
*/
int
wait_connection_availability(PGconn *conn, long long timeout)
{
PGresult *res;
fd_set read_set;
int sock = PQsocket(conn);
struct timeval tmout,
before,
after;
struct timezone tz;
/* recalc to microseconds */
timeout *= 1000000;
while (timeout > 0)
{
if (PQconsumeInput(conn) == 0)
{
log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"),
PQerrorMessage(conn));
return 0;
}
if (PQisBusy(conn) == 0)
{
do
{
res = PQgetResult(conn);
PQclear(res);
} while (res != NULL);
break;
}
tmout.tv_sec = 0;
tmout.tv_usec = 250000;
FD_ZERO(&read_set);
FD_SET(sock, &read_set);
gettimeofday(&before, &tz);
if (select(sock, &read_set, NULL, NULL, &tmout) == -1)
{
log_warning(
_("wait_connection_availability: select() returned with error: %s"),
strerror(errno));
return -1;
}
gettimeofday(&after, &tz);
timeout -= (after.tv_sec * 1000000 + after.tv_usec) -
(before.tv_sec * 1000000 + before.tv_usec);
}
if (timeout >= 0)
{
return 1;
}
log_warning(_("wait_connection_availability: timeout reached"));
return -1;
}
bool
cancel_query(PGconn *conn, int timeout)
{
char errbuf[ERRBUFF_SIZE];
PGcancel *pgcancel;
if (wait_connection_availability(conn, timeout) != 1)
return false;
pgcancel = PQgetCancel(conn);
if (pgcancel == NULL)
return false;
/*
* PQcancel can only return 0 if socket()/connect()/send() fails, in any
* of those cases we can assume something bad happened to the connection
*/
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
{
log_warning(_("Can't stop current query: %s\n"), errbuf);
PQfreeCancel(pgcancel);
return false;
}
PQfreeCancel(pgcancel);
return true;
}

View File

@@ -1,6 +1,6 @@
/* /*
* dbutils.h * dbutils.h
* Copyright (c) 2ndQuadrant, 2010-2011 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -22,16 +22,25 @@
#include "strutil.h" #include "strutil.h"
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error); PGconn *establish_db_connection(const char *conninfo,
PGconn *establishDBConnectionByParams(const char *keywords[], const bool exit_on_error);
const char *values[], PGconn *establish_db_connection_by_params(const char *keywords[],
const bool exit_on_error); const char *values[],
bool is_standby(PGconn *conn); const bool exit_on_error);
char *pg_version(PGconn *conn, char* major_version); int is_standby(PGconn *conn);
bool guc_setted(PGconn *conn, const char *parameter, const char *op, int is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
const char *value); bool is_pgup(PGconn *conn, int timeout);
const char *get_cluster_size(PGconn *conn); char *pg_version(PGconn *conn, char *major_version);
PGconn *getMasterConnection(PGconn *standby_conn, char *cluster, int guc_set(PGconn *conn, const char *parameter, const char *op,
int *master_id, char *master_conninfo_out); const char *value);
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
const char *value, const char *datatype);
const char *get_cluster_size(PGconn *conn);
PGconn *get_master_connection(PGconn *standby_conn, char *schema, char *cluster,
int *master_id, char *master_conninfo_out);
int wait_connection_availability(PGconn *conn, long long timeout);
bool cancel_query(PGconn *conn, int timeout);
#endif #endif

View File

@@ -1,9 +1,9 @@
Package: repmgr Package: repmgr-auto
Version: 1.0-1 Version: 2.0beta2
Section: database Section: database
Priority: optional Priority: optional
Architecture: all Architecture: all
Depends: rsync, postgresql-9.0 Depends: rsync, postgresql-9.0 | postgresql-9.1 | postgresql-9.2 | postgresql-9.3
Maintainer: Greg Smith <greg@2ndQuadrant.com> Maintainer: Jaime Casanova <jaime@2ndQuadrant.com>
Description: PostgreSQL replication setup, magament and monitoring Description: PostgreSQL replication setup, magament and monitoring
has two main executables has two main executables

18
debian/repmgr.repmgrd.default vendored Normal file
View File

@@ -0,0 +1,18 @@
# default settings for repmgrd. This file is source by /bin/sh from
# /etc/init.d/repmgrd
# disable repmgrd by default so it won't get started upon installation
# valid values: yes/no
REPMGRD_ENABLED=no
# Options for repmgrd (required)
#REPMGRD_OPTS="--config-file /path/to/repmgr.conf"
# User to run repmgrd as
#REPMGRD_USER=postgres
# repmgrd binary
#REPMGR_BIN=/usr/bin/repmgr
# pid file
#REPMGR_PIDFILE=/var/run/repmgrd.pid

101
debian/repmgr.repmgrd.init vendored Normal file
View File

@@ -0,0 +1,101 @@
#!/bin/sh
### BEGIN INIT INFO
# Provides: repmgrd
# Required-Start: $local_fs $remote_fs $network $syslog postgresql
# Required-Stop: $local_fs $remote_fs $network $syslog postgresql
# Should-Start: $syslog postgresql
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Start/stop repmgrd
# Description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
### END INIT INFO
set -e
DESC="PostgreSQL replication management and monitoring daemon"
NAME=repmgrd
REPMGRD_ENABLED=no
REPMGRD_OPTS=
REPMGRD_USER=postgres
REPMGRD_BIN=/usr/bin/repmgrd
REPMGRD_PIDFILE=/var/run/repmgrd.pid
# Read configuration variable file if it is present
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
test -x $REPMGRD_BIN || exit 0
case "$REPMGRD_ENABLED" in
[Yy]*)
break
;;
*)
exit 0
;;
esac
# Define LSB log_* functions.
. /lib/lsb/init-functions
if [ -z "$REPMGRD_OPTS" ]
then
log_warning_msg "Not starting $NAME, REPMGRD_OPTS not set in /etc/default/$NAME"
exit 0
fi
do_start()
{
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# other if daemon could not be started or a failure occured
start-stop-daemon --start --quiet --background --chuid $REPMGRD_USER --make-pidfile --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN -- $REPMGRD_OPTS
}
do_stop()
{
# Return
# 0 if daemon has been stopped
# 1 if daemon was already stopped
# other if daemon could not be stopped or a failure occurred
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $REPMGRD_PIDFILE --exec $REPMGRD_BIN
}
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0) log_end_msg 0 ;;
1) log_progress_msg "already started"
log_end_msg 0 ;;
*) log_end_msg 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
do_stop
case "$?" in
0) log_end_msg 0 ;;
1) log_progress_msg "already stopped"
log_end_msg 0 ;;
*) log_end_msg 1 ;;
esac
;;
restart|force-reload)
$0 stop
$0 start
;;
status)
status_of_proc -p $REPMGRD_PIDFILE $REPMGRD_BIN $NAME && exit 0 || exit $?
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload|status}" >&2
exit 3
;;
esac
exit 0

View File

@@ -1,6 +1,6 @@
/* /*
* errcode.h * errcode.h
* Copyright (C) 2ndQuadrant, 2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -33,5 +33,8 @@
#define ERR_PROMOTED 8 #define ERR_PROMOTED 8
#define ERR_BAD_PASSWORD 9 #define ERR_BAD_PASSWORD 9
#define ERR_STR_OVERFLOW 10 #define ERR_STR_OVERFLOW 10
#define ERR_FAILOVER_FAIL 11
#define ERR_BAD_SSH 12
#define ERR_SYS_FAILURE 13
#endif /* _ERRCODE_H_ */ #endif /* _ERRCODE_H_ */

134
log.c
View File

@@ -1,6 +1,6 @@
/* /*
* log.c - Logging methods * log.c - Logging methods
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This module is a set of methods for logging (currently only syslog) * This module is a set of methods for logging (currently only syslog)
* *
@@ -25,9 +25,11 @@
#ifdef HAVE_SYSLOG #ifdef HAVE_SYSLOG
#include <syslog.h> #include <syslog.h>
#include <stdarg.h>
#endif #endif
#include <stdarg.h>
#include <time.h>
#include "log.h" #include "log.h"
#define DEFAULT_IDENT "repmgr" #define DEFAULT_IDENT "repmgr"
@@ -37,20 +39,44 @@
/* #define REPMGR_DEBUG */ /* #define REPMGR_DEBUG */
static int detect_log_level(const char* level); void
static int detect_log_facility(const char* facility); stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
int log_type = REPMGR_STDERR;
int log_level = LOG_NOTICE;
bool logger_init(const char* ident, const char* level, const char* facility)
{ {
time_t t;
struct tm *tm;
char buff[100];
va_list ap;
int l; if (log_level >= level)
int f; {
time(&t);
tm = localtime(&t);
strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
fprintf(stderr, "%s [%s] ", buff, level_name);
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
fflush(stderr);
}
}
static int detect_log_level(const char *level);
static int detect_log_facility(const char *facility);
int log_type = REPMGR_STDERR;
int log_level = LOG_NOTICE;
bool
logger_init(t_configuration_options * opts, const char *ident, const char *level, const char *facility)
{
int l;
int f;
#ifdef HAVE_SYSLOG #ifdef HAVE_SYSLOG
int syslog_facility = DEFAULT_SYSLOG_FACILITY; int syslog_facility = DEFAULT_SYSLOG_FACILITY;
#endif #endif
#ifdef REPMGR_DEBUG #ifdef REPMGR_DEBUG
@@ -107,21 +133,33 @@ bool logger_init(const char* ident, const char* level, const char* facility)
if (log_type == REPMGR_SYSLOG) if (log_type == REPMGR_SYSLOG)
{ {
setlogmask (LOG_UPTO (log_level)); setlogmask(LOG_UPTO(log_level));
openlog (ident, LOG_CONS | LOG_PID | LOG_NDELAY, syslog_facility); openlog(ident, LOG_CONS | LOG_PID | LOG_NDELAY, syslog_facility);
stderr_log_notice(_("Setup syslog (level: %s, facility: %s)\n"), level, facility); stderr_log_notice(_("Setup syslog (level: %s, facility: %s)\n"), level, facility);
} }
#endif #endif
if (*opts->logfile)
{
FILE *fd;
fd = freopen(opts->logfile, "a", stderr);
if (fd == NULL)
{
fprintf(stderr, "error reopening stderr to '%s': %s",
opts->logfile, strerror(errno));
}
}
return true; return true;
} }
bool logger_shutdown(void) bool
logger_shutdown(void)
{ {
#ifdef HAVE_SYSLOG #ifdef HAVE_SYSLOG
if (log_type == REPMGR_SYSLOG) if (log_type == REPMGR_SYSLOG)
closelog(); closelog();
@@ -135,13 +173,15 @@ bool logger_shutdown(void)
* options, which might increase requested logging over what's specified * options, which might increase requested logging over what's specified
* in the regular configuration file. * in the regular configuration file.
*/ */
void logger_min_verbose(int minimum) void
logger_min_verbose(int minimum)
{ {
if (log_level < minimum) if (log_level < minimum)
log_level = minimum; log_level = minimum;
} }
int detect_log_level(const char* level) int
detect_log_level(const char *level)
{ {
if (!strcmp(level, "DEBUG")) if (!strcmp(level, "DEBUG"))
return LOG_DEBUG; return LOG_DEBUG;
@@ -163,40 +203,42 @@ int detect_log_level(const char* level)
return 0; return 0;
} }
int detect_log_facility(const char* facility) int
detect_log_facility(const char *facility)
{ {
int local = 0; int local = 0;
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6) if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
{ {
local = atoi (&facility[5]); local = atoi(&facility[5]);
switch (local) switch (local)
{ {
case 0: case 0:
return LOG_LOCAL0; return LOG_LOCAL0;
break; break;
case 1: case 1:
return LOG_LOCAL1; return LOG_LOCAL1;
break; break;
case 2: case 2:
return LOG_LOCAL2; return LOG_LOCAL2;
break; break;
case 3: case 3:
return LOG_LOCAL3; return LOG_LOCAL3;
break; break;
case 4: case 4:
return LOG_LOCAL4; return LOG_LOCAL4;
break; break;
case 5: case 5:
return LOG_LOCAL5; return LOG_LOCAL5;
break; break;
case 6: case 6:
return LOG_LOCAL6; return LOG_LOCAL6;
break; break;
case 7: case 7:
return LOG_LOCAL7; return LOG_LOCAL7;
break; break;
} }
} }

53
log.h
View File

@@ -1,6 +1,6 @@
/* /*
* log.h * log.h
* Copyright (c) 2ndQuadrant, 2010-2011 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -25,15 +25,19 @@
#define REPMGR_SYSLOG 1 #define REPMGR_SYSLOG 1
#define REPMGR_STDERR 2 #define REPMGR_STDERR 2
void
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
/* Standard error logging */ /* Standard error logging */
#define stderr_log_debug(...) if (log_level >= LOG_DEBUG) fprintf(stderr, __VA_ARGS__) #define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
#define stderr_log_info(...) if (log_level >= LOG_INFO) fprintf(stderr, __VA_ARGS__) #define stderr_log_info(...) stderr_log_with_level("INFO", LOG_INFO, __VA_ARGS__)
#define stderr_log_notice(...) if (log_level >= LOG_NOTICE) fprintf(stderr, __VA_ARGS__) #define stderr_log_notice(...) stderr_log_with_level("NOTICE", LOG_NOTICE, __VA_ARGS__)
#define stderr_log_warning(...) if (log_level >= LOG_WARNING) fprintf(stderr, __VA_ARGS__) #define stderr_log_warning(...) stderr_log_with_level("WARNING", LOG_WARNING, __VA_ARGS__)
#define stderr_log_err(...) if (log_level >= LOG_ERR) fprintf(stderr, __VA_ARGS__) #define stderr_log_err(...) stderr_log_with_level("ERROR", LOG_ERR, __VA_ARGS__)
#define stderr_log_crit(...) if (log_level >= LOG_CRIT) fprintf(stderr, __VA_ARGS__) #define stderr_log_crit(...) stderr_log_with_level("CRITICAL", LOG_CRIT, __VA_ARGS__)
#define stderr_log_alert(...) if (log_level >= LOG_ALERT) fprintf(stderr, __VA_ARGS__) #define stderr_log_alert(...) stderr_log_with_level("ALERT", LOG_ALERT, __VA_ARGS__)
#define stderr_log_emerg(...) if (log_level >= LOG_EMERG) fprintf(stderr, __VA_ARGS__) #define stderr_log_emerg(...) stderr_log_with_level("EMERGENCY", LOG_EMERG, __VA_ARGS__)
#ifdef HAVE_SYSLOG #ifdef HAVE_SYSLOG
@@ -86,17 +90,16 @@
if (log_type == REPMGR_SYSLOG) syslog(LOG_ALERT, __VA_ARGS__); \ if (log_type == REPMGR_SYSLOG) syslog(LOG_ALERT, __VA_ARGS__); \
else stderr_log_alert(__VA_ARGS__); \ else stderr_log_alert(__VA_ARGS__); \
} }
#else #else
#define LOG_EMERG 0 /* system is unusable */ #define LOG_EMERG 0 /* system is unusable */
#define LOG_ALERT 1 /* action must be taken immediately */ #define LOG_ALERT 1 /* action must be taken immediately */
#define LOG_CRIT 2 /* critical conditions */ #define LOG_CRIT 2 /* critical conditions */
#define LOG_ERR 3 /* error conditions */ #define LOG_ERR 3 /* error conditions */
#define LOG_WARNING 4 /* warning conditions */ #define LOG_WARNING 4 /* warning conditions */
#define LOG_NOTICE 5 /* normal but significant condition */ #define LOG_NOTICE 5 /* normal but significant condition */
#define LOG_INFO 6 /* informational */ #define LOG_INFO 6 /* informational */
#define LOG_DEBUG 7 /* debug-level messages */ #define LOG_DEBUG 7 /* debug-level messages */
#define log_debug(...) stderr_log_debug(__VA_ARGS__) #define log_debug(...) stderr_log_debug(__VA_ARGS__)
#define log_info(...) stderr_log_info(__VA_ARGS__) #define log_info(...) stderr_log_info(__VA_ARGS__)
@@ -106,16 +109,18 @@
#define log_crit(...) stderr_log_crit(__VA_ARGS__) #define log_crit(...) stderr_log_crit(__VA_ARGS__)
#define log_alert(...) stderr_log_alert(__VA_ARGS__) #define log_alert(...) stderr_log_alert(__VA_ARGS__)
#define log_emerg(...) stderr_log_emerg(__VA_ARGS__) #define log_emerg(...) stderr_log_emerg(__VA_ARGS__)
#endif #endif
/* Logger initialisation and shutdown */ /* Logger initialisation and shutdown */
bool logger_shutdown(void); bool logger_shutdown(void);
bool logger_init(const char* ident, const char* level, const char* facility);
void logger_min_verbose(int minimum);
extern int log_type; bool logger_init(t_configuration_options * opts, const char *ident,
extern int log_level; const char *level, const char *facility);
void logger_min_verbose(int minimum);
extern int log_type;
extern int log_level;
#endif #endif

2325
repmgr.c

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +0,0 @@
###################################################
# Replication Manager configuration file
###################################################
# Cluster name
cluster=test
# Node ID
node=2
# Connection information
conninfo='host=192.168.204.104'
rsync_options=--archive --checksum --compress --progress --rsh=ssh
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
# Default: NOTICE
loglevel=NOTICE
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
# Default: STDERR
logfacility=STDERR

62
repmgr.conf.sample Normal file
View File

@@ -0,0 +1,62 @@
###################################################
# Replication Manager configuration file
###################################################
# Cluster name
cluster=test
# Node ID
node=2
node_name=standby2
# Connection information
conninfo='host=192.168.204.104'
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
ssh_options=-o "StrictHostKeyChecking no"
# How many seconds we wait for master response before declaring master failure
master_response_timeout=60
# How many time we try to reconnect to master before starting failover procedure
reconnect_attempts=6
reconnect_interval=10
# Autofailover options
failover=manual
priority=-1
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
# Default: NOTICE
loglevel=NOTICE
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
# Default: STDERR
logfacility=STDERR
# path to pg_ctl executable
pg_bindir=/usr/bin/
#
# you may add command line arguments for pg_ctl
#
# pg_ctl_options='-s'
#
# redirect stderr to a logfile
#
# logfile='/var/log/repmgr.log'
#
# change monitoring interval; default is 2s
#
# monitor_interval_secs=2
#
# change wait time for master; before we bail out and exit when the
# master disappears, we wait 6 * retry_promote_interval_secs seconds;
# by default this would be half an hour (since sleep_delay default
# value is 300)
#
# retry_promote_interval_secs=300

View File

@@ -1,6 +1,6 @@
/* /*
* repmgr.h * repmgr.h
* Copyright (c) 2ndQuadrant, 2010-2011 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
#define PRIMARY_MODE 0 #define PRIMARY_MODE 0
#define STANDBY_MODE 1 #define STANDBY_MODE 1
#define WITNESS_MODE 2
#include "config.h" #include "config.h"
#define MAXFILENAME 1024 #define MAXFILENAME 1024
@@ -42,25 +43,32 @@
#define DEFAULT_DBNAME "postgres" #define DEFAULT_DBNAME "postgres"
#define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_" #define DEFAULT_REPMGR_SCHEMA_PREFIX "repmgr_"
#define MANUAL_FAILOVER 0
#define AUTOMATIC_FAILOVER 1
/* Run time options type */ /* Run time options type */
typedef struct typedef struct
{ {
char dbname[MAXLEN]; char dbname[MAXLEN];
char host[MAXLEN]; char host[MAXLEN];
char username[MAXLEN]; char username[MAXLEN];
char dest_dir[MAXFILENAME]; char dest_dir[MAXFILENAME];
char config_file[MAXFILENAME]; char config_file[MAXFILENAME];
char remote_user[MAXLEN]; char remote_user[MAXLEN];
char wal_keep_segments[MAXLEN]; char wal_keep_segments[MAXLEN];
bool verbose; bool verbose;
bool force; bool force;
bool ignore_rsync_warn; bool wait_for_master;
bool ignore_rsync_warn;
char masterport[MAXLEN]; char masterport[MAXLEN];
char localport[MAXLEN];
/* parameter used by CLUSTER CLEANUP */ /* parameter used by CLUSTER CLEANUP */
int keep_history; int keep_history;
} t_runtime_options; } t_runtime_options;
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }
#endif #endif

View File

@@ -1,7 +1,7 @@
/* /*
* repmgr.sql * repmgr.sql
* *
* Copyright (C) 2ndQuadrant, 2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
*/ */
@@ -14,8 +14,11 @@ CREATE SCHEMA repmgr;
*/ */
CREATE TABLE repl_nodes ( CREATE TABLE repl_nodes (
id integer primary key, id integer primary key,
cluster text not null, -- Name to identify the cluster cluster text not null, -- Name to identify the cluster
conninfo text not null name text not null,
conninfo text not null,
priority integer not null,
witness boolean not null default false
); );
ALTER TABLE repl_nodes OWNER TO repmgr; ALTER TABLE repl_nodes OWNER TO repmgr;
@@ -28,13 +31,12 @@ CREATE TABLE repl_monitor (
standby_node INTEGER NOT NULL, standby_node INTEGER NOT NULL,
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
last_wal_primary_location TEXT NOT NULL, last_wal_primary_location TEXT NOT NULL,
last_wal_standby_location TEXT NOT NULL, last_wal_standby_location TEXT, -- In case of a witness server this will be NULL
replication_lag BIGINT NOT NULL, replication_lag BIGINT NOT NULL,
apply_lag BIGINT NOT NULL apply_lag BIGINT NOT NULL
); );
ALTER TABLE repl_monitor OWNER TO repmgr; ALTER TABLE repl_monitor OWNER TO repmgr;
/* /*
* This view shows the latest monitor info about every node. * This view shows the latest monitor info about every node.
* Interesting thing to see: * Interesting thing to see:
@@ -46,14 +48,14 @@ ALTER TABLE repl_monitor OWNER TO repmgr;
* time_lag: how many seconds are we from being up-to-date with master * time_lag: how many seconds are we from being up-to-date with master
*/ */
CREATE VIEW repl_status AS CREATE VIEW repl_status AS
WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node SELECT primary_node, standby_node, name AS standby_name, last_monitor_time, last_wal_primary_location,
ORDER BY last_monitor_time desc)
FROM repl_monitor)
SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location,
last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag,
pg_size_pretty(apply_lag) apply_lag, pg_size_pretty(apply_lag) apply_lag,
age(now(), last_monitor_time) AS time_lag age(now(), last_monitor_time) AS time_lag
FROM monitor_info a FROM repl_monitor JOIN repl_nodes ON standby_node = id
WHERE row_number = 1; WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monitor_time)
FROM repl_monitor GROUP BY 1);
ALTER VIEW repl_status OWNER TO repmgr; ALTER VIEW repl_status OWNER TO repmgr;
CREATE INDEX idx_repl_status_sort ON repl_monitor(last_monitor_time, standby_node);

1494
repmgrd.c

File diff suppressed because it is too large Load Diff

20
sql/Makefile Normal file
View File

@@ -0,0 +1,20 @@
#
# Makefile
# Copyright (c) 2ndQuadrant, 2010
#
MODULE_big = repmgr_funcs
DATA_built=repmgr_funcs.sql
DATA=uninstall_repmgr_funcs.sql
OBJS=repmgr_funcs.o
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/repmgr/sql
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

232
sql/repmgr_funcs.c Normal file
View File

@@ -0,0 +1,232 @@
/*
* repmgr_funcs.c
* Copyright (c) 2ndQuadrant, 2010
*
* Shared memory state management and some backend functions in SQL
*/
#include "postgres.h"
#include "fmgr.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/procarray.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/timestamp.h"
/* same definition as the one in xlog_internal.h */
#define MAXFNAMELEN 64
PG_MODULE_MAGIC;
/*
* Global shared state
*/
typedef struct repmgrSharedState
{
LWLockId lock; /* protects search/modification */
char location[MAXFNAMELEN]; /* last known xlog location */
TimestampTz last_updated;
} repmgrSharedState;
/* Links to shared memory state */
static repmgrSharedState *shared_state = NULL;
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
void _PG_init(void);
void _PG_fini(void);
static void repmgr_shmem_startup(void);
static Size repmgr_memsize(void);
static bool repmgr_set_standby_location(char *locationstr);
Datum repmgr_update_standby_location(PG_FUNCTION_ARGS);
Datum repmgr_get_last_standby_location(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgr_update_standby_location);
PG_FUNCTION_INFO_V1(repmgr_get_last_standby_location);
Datum repmgr_update_last_updated(PG_FUNCTION_ARGS);
Datum repmgr_get_last_updated(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgr_update_last_updated);
PG_FUNCTION_INFO_V1(repmgr_get_last_updated);
/*
* Module load callback
*/
void
_PG_init(void)
{
/*
* In order to create our shared memory area, we have to be loaded via
* shared_preload_libraries. If not, fall out without hooking into any of
* the main system. (We don't throw error here because it seems useful to
* allow the repmgr functions to be created even when the module isn't
* active. The functions must protect themselves against being called
* then, however.)
*/
if (!process_shared_preload_libraries_in_progress)
return;
/*
* Request additional shared resources. (These are no-ops if we're not in
* the postmaster process.) We'll allocate or attach to the shared
* resources in repmgr_shmem_startup().
*/
RequestAddinShmemSpace(repmgr_memsize());
RequestAddinLWLocks(1);
/*
* Install hooks.
*/
prev_shmem_startup_hook = shmem_startup_hook;
shmem_startup_hook = repmgr_shmem_startup;
}
/*
* Module unload callback
*/
void
_PG_fini(void)
{
/* Uninstall hooks. */
shmem_startup_hook = prev_shmem_startup_hook;
}
/*
* shmem_startup hook: allocate or attach to shared memory,
*/
static void
repmgr_shmem_startup(void)
{
bool found;
if (prev_shmem_startup_hook)
prev_shmem_startup_hook();
/* reset in case this is a restart within the postmaster */
shared_state = NULL;
/*
* Create or attach to the shared memory state, including hash table
*/
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
shared_state = ShmemInitStruct("repmgr shared state",
sizeof(repmgrSharedState),
&found);
if (!found)
{
/* First time through ... */
shared_state->lock = LWLockAssign();
snprintf(shared_state->location,
sizeof(shared_state->location), "%X/%X", 0, 0);
}
LWLockRelease(AddinShmemInitLock);
}
/*
* Estimate shared memory space needed.
*/
static Size
repmgr_memsize(void)
{
return MAXALIGN(sizeof(repmgrSharedState));
}
static bool
repmgr_set_standby_location(char *locationstr)
{
/* Safety check... */
if (!shared_state)
return false;
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
strncpy(shared_state->location, locationstr, MAXFNAMELEN);
LWLockRelease(shared_state->lock);
return true;
}
/* SQL Functions */
/* Read last xlog location reported by this standby from shared memory */
Datum
repmgr_get_last_standby_location(PG_FUNCTION_ARGS)
{
char location[MAXFNAMELEN];
/* Safety check... */
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
strncpy(location, shared_state->location, MAXFNAMELEN);
LWLockRelease(shared_state->lock);
PG_RETURN_TEXT_P(cstring_to_text(location));
}
/* Set update last xlog location reported by this standby to shared memory */
Datum
repmgr_update_standby_location(PG_FUNCTION_ARGS)
{
text *location = PG_GETARG_TEXT_P(0);
char *locationstr;
/* Safety check... */
if (!shared_state)
PG_RETURN_BOOL(false);
locationstr = text_to_cstring(location);
PG_RETURN_BOOL(repmgr_set_standby_location(locationstr));
}
/* update and return last updated with current timestamp */
Datum
repmgr_update_last_updated(PG_FUNCTION_ARGS)
{
TimestampTz last_updated = GetCurrentTimestamp();
/* Safety check... */
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
shared_state->last_updated = last_updated;
LWLockRelease(shared_state->lock);
PG_RETURN_TIMESTAMPTZ(last_updated);
}
/* get last updated timestamp */
Datum
repmgr_get_last_updated(PG_FUNCTION_ARGS)
{
TimestampTz last_updated;
/* Safety check... */
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
last_updated = shared_state->last_updated;
LWLockRelease(shared_state->lock);
PG_RETURN_TIMESTAMPTZ(last_updated);
}

23
sql/repmgr_funcs.sql.in Normal file
View File

@@ -0,0 +1,23 @@
/*
* repmgr_function.sql
* Copyright (c) 2ndQuadrant, 2010-2014
*
*/
-- SET SEARCH_PATH TO 'repmgr';
CREATE FUNCTION repmgr_update_standby_location(text) RETURNS boolean
AS 'MODULE_PATHNAME', 'repmgr_update_standby_location'
LANGUAGE C STRICT;
CREATE FUNCTION repmgr_get_last_standby_location() RETURNS text
AS 'MODULE_PATHNAME', 'repmgr_get_last_standby_location'
LANGUAGE C STRICT;
CREATE FUNCTION repmgr_update_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
AS 'MODULE_PATHNAME', 'repmgr_update_last_updated'
LANGUAGE C STRICT;
CREATE FUNCTION repmgr_get_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
AS 'MODULE_PATHNAME', 'repmgr_get_last_updated'
LANGUAGE C STRICT;

View File

@@ -0,0 +1,11 @@
/*
* uninstall_repmgr_funcs.sql
* Copyright (c) 2ndQuadrant, 2010-2014
*
*/
DROP FUNCTION repmgr_update_standby_location(text);
DROP FUNCTION repmgr_get_last_standby_location();
DROP FUNCTION repmgr_update_last_updated();
DROP FUNCTION repmgr_get_last_updated();

View File

@@ -1,7 +1,7 @@
/* /*
* strutil.c * strutil.c
* *
* Copyright (C) 2ndQuadrant, 2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -25,29 +25,21 @@
#include "log.h" #include "log.h"
#include "strutil.h" #include "strutil.h"
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); static int
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
/* Add strnlen on platforms that don't have it, like OS X */ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
#ifndef strnlen
size_t
strnlen(const char *s, size_t n)
{
const char *end = (const char *) memchr(s, '\0', n);
return(end ? end - s : n);
}
#endif
static int static int
xvsnprintf(char *str, size_t size, const char *format, va_list ap) xvsnprintf(char *str, size_t size, const char *format, va_list ap)
{ {
int retval; int retval;
retval = vsnprintf(str, size, format, ap); retval = vsnprintf(str, size, format, ap);
if (retval >= size) if (retval >= (int) size)
{ {
log_err(_("Buffer of size not large enough to format entire string '%s'\n"), log_err(_("Buffer of size not large enough to format entire string '%s'\n"),
str); str);
exit(ERR_STR_OVERFLOW); exit(ERR_STR_OVERFLOW);
} }
@@ -56,10 +48,10 @@ xvsnprintf(char *str, size_t size, const char *format, va_list ap)
int int
xsnprintf(char *str, size_t size, const char *format, ...) xsnprintf(char *str, size_t size, const char *format,...)
{ {
va_list arglist; va_list arglist;
int retval; int retval;
va_start(arglist, format); va_start(arglist, format);
retval = xvsnprintf(str, size, format, arglist); retval = xvsnprintf(str, size, format, arglist);
@@ -70,7 +62,7 @@ xsnprintf(char *str, size_t size, const char *format, ...)
int int
sqlquery_snprintf(char *str, const char *format, ...) sqlquery_snprintf(char *str, const char *format,...)
{ {
va_list arglist; va_list arglist;
int retval; int retval;
@@ -83,7 +75,8 @@ sqlquery_snprintf(char *str, const char *format, ...)
} }
int maxlen_snprintf(char *str, const char *format, ...) int
maxlen_snprintf(char *str, const char *format,...)
{ {
va_list arglist; va_list arglist;
int retval; int retval;

View File

@@ -1,6 +1,6 @@
/* /*
* strutil.h * strutil.h
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@@ -22,7 +22,7 @@
#define _STRUTIL_H_ #define _STRUTIL_H_
#include <stdlib.h> #include <stdlib.h>
#include <errcode.h> #include "errcode.h"
#define QUERY_STR_LEN 8192 #define QUERY_STR_LEN 8192
#define MAXLEN 1024 #define MAXLEN 1024
@@ -31,13 +31,16 @@
#define MAXCONNINFO 1024 #define MAXCONNINFO 1024
extern int xsnprintf(char *str, size_t size, const char *format, ...); extern int
extern int sqlquery_snprintf(char *str, const char *format, ...); xsnprintf(char *str, size_t size, const char *format,...)
extern int maxlen_snprintf(char *str, const char *format, ...); __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
/* Add strnlen on platforms that don't have it, like OS X */ extern int
#ifndef strnlen sqlquery_snprintf(char *str, const char *format,...)
extern size_t strnlen(const char *s, size_t n); __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
#endif
#endif /* _STRUTIL_H_ */ extern int
maxlen_snprintf(char *str, const char *format,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
#endif /* _STRUTIL_H_ */

View File

@@ -1,7 +1,7 @@
/* /*
* uninstall_repmgr.sql * uninstall_repmgr.sql
* *
* Copyright (C) 2ndQuadrant, 2010-2011 * Copyright (C) 2ndQuadrant, 2010-2014
* *
*/ */

View File

@@ -1,4 +1,6 @@
#ifndef _VERSION_H_ #ifndef _VERSION_H_
#define _VERSION_H_ #define _VERSION_H_
#define REPMGR_VERSION "1.2.0"
#define REPMGR_VERSION "2.0.2"
#endif #endif