Compare commits

...

86 Commits

Author SHA1 Message Date
Christian Kruse
9c3d79147b now version.h contains the right version 2014-02-07 21:47:39 +01:00
Christian Kruse
ca470647cb cleanup of usage text
Now it properly aligns and breaks at 78 characters.
2014-01-30 14:26:17 +01:00
Christian Kruse
62ee287e3f updated TODO 2014-01-30 14:10:14 +01:00
Christian Kruse
729a1b848a release notes for 2.0 stable 2014-01-30 13:59:17 +01:00
Christian Kruse
701cf043fd fix: seems as if I missread -hackers 2014-01-23 16:46:49 +01:00
Christian Kruse
bbb67c55f6 simple past of set is set 2014-01-23 10:50:37 +01:00
Christian Kruse
c2c48a9fe6 removed already finished TODO tasks 2014-01-23 10:48:04 +01:00
Christian Kruse
9d6ac2ebf9 fixed documentation and line endings 2014-01-23 10:39:21 +01:00
Christian Kruse
680f23fb1d copyright push 2014-01-23 10:37:49 +01:00
Christian Kruse
1159113c58 ignore the dynamic shared memory directory, too 2014-01-23 10:02:32 +01:00
Christian Kruse
f25a709454 added an explicit type cast to avoid compiler warnings 2014-01-22 15:17:47 +01:00
Christian Kruse
897daddcc7 removed not needed arguments to avoid compiler warnings 2014-01-22 15:17:28 +01:00
Christian Kruse
0fdcce0477 use if instead of switch and avoid a warning 2014-01-22 15:12:29 +01:00
Christian Kruse
de58eff7c1 added a chdir() for proper daemonizing 2014-01-22 14:30:38 +01:00
Christian Kruse
f2a0b31a20 more log format fixes 2014-01-22 14:30:24 +01:00
Christian Kruse
e007a55967 fix: do not use fsync()
We do not need fsync(), the fflush() is enough to avoid concurrent
logs.
2014-01-22 11:47:50 +01:00
Christian Kruse
d235c696af fix: do not newline at the start of a log line
This breaks the log file format since it will have a line break directly
after the timestamp
2014-01-22 11:47:02 +01:00
Christian Kruse
4ef6fbb5fe do not close stderr but reopen it to /dev/null
We want stderr to be always a valid file descriptor
2014-01-21 16:25:57 +01:00
Christian Kruse
2e61d7b156 refactoring: daemonizing is now a function 2014-01-21 16:19:49 +01:00
Christian Kruse
4496a0761e we now use a function and are more sophisticated
Refactoring part: we now use a function to generate the PID
file. Sophistication: we now check if the PID contained in the file is a
valid PID. We ignore the file if it doesn't.
2014-01-21 16:18:15 +01:00
Christian Kruse
3978ead184 use a second fork to avoid a terminal
after the setsid() we are the process leader. And as a process leader we
are able to open a new terminal, even if we currently don't own one. So
we do another fork and do not call setsid() and not become a process
leader to avoid that.
2014-01-21 15:51:33 +01:00
Christian Kruse
b36dbf61fe reopening stdin and stdout to /dev/null now
stdin, stdout and stderr should always be valid file handles. Thus we
don't close them but reopen them to /dev/null
2014-01-21 15:31:38 +01:00
Christian Kruse
84466ecca5 log_crit() is more appropriate 2014-01-21 15:23:20 +01:00
Christian Kruse
649086e5e4 use unlink() instead of remove()
`remove()` will do a rmdir if necessary - we don't want that. So we use `unlink()`
2014-01-21 15:22:31 +01:00
Christian Kruse
7cf2eb440d renamed config options to a much more descriptive name 2014-01-21 15:19:50 +01:00
Christian Kruse
388bbfb773 split install target into install_prog and install_ext
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:23:33 +01:00
Christian Kruse
a89aa02c68 fix: make pg_config be settable from outside the makefile
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:22:59 +01:00
Christian Kruse
c81793b63f fix: added forgotten options.priority value
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:18:12 +01:00
Christian Kruse
b4e83cf188 Add format attribute checking for printf() like functions
Patch by Marco Nenciarini <mnencia@debian.org>
2014-01-21 14:14:36 +01:00
Christian Kruse
1db61ce277 fix: fail when repmgr_funcs is not pre-loaded
when repmgr_funcs is not pre-loaded `repmgr_update_standby_location()`
will return false and `repmgr_get_last_standby_location()` will return
an empty string. Thus we may end in an endless loop. To avoid that we fail.
2014-01-21 13:54:10 +01:00
Christian Kruse
41abf9a7ef fix: flushing and fsync()ing the log file
When not flushing and fsync()ing it the output may be garbled due to
concurrent writes to the file (system() spawns a child process with
stdin/stdout/stderr inherited from it's parent)
2014-01-21 13:52:27 +01:00
Christian Kruse
abebc53ddc fix: sscanf() does not set variables to 0 on error 2014-01-21 13:48:41 +01:00
Christian Kruse
5fc4a0382f added config options sleep_delay and sleep_monitor
sleep_monitor replaces the old SLEEP_MONITOR define and makes it
configurable; this is the interval in which we monitor

sleep_delay replaces the old sleep(300) when waiting for the master to
recover.
2014-01-17 14:35:50 +01:00
Christian Kruse
a7d3c9b93a fix: also close stderr when using syslog logging 2014-01-17 12:14:26 +01:00
Christian Kruse
ee9dc9e247 do not use exit()
We avoid using exit() to be able to clean up when we have to
terminate. This includes removal of the PID file as well as closing
database connections.
2014-01-17 11:28:55 +01:00
Christian Kruse
94cb5b94e7 fix: reopen log file on SIGHUP 2014-01-16 17:16:45 +01:00
Christian Kruse
a08aa50f92 fix: close stdin and stdout only in repmgrd
closing stdin and stdout might cause problems when using system(), so we
avoid it.
2014-01-16 16:01:58 +01:00
Christian Kruse
9563877fbb new config option, stdout/stdin closed
Now stdin and stdout get closed. Additionally stderr gets closed and
reopened to the new config option „logfile“ if specified
2014-01-16 15:22:34 +01:00
Christian Kruse
4f3bd6612c do not exit in getMasterConnection() 2014-01-16 15:07:15 +01:00
Christian Kruse
192ee3cdb0 do not exit in get_cluster_size 2014-01-16 15:07:06 +01:00
Christian Kruse
6f149ead8f do not exit in guc_setted and guc_setted_typed 2014-01-16 14:48:46 +01:00
Christian Kruse
77aa6aa326 do not exit in pg_version 2014-01-16 14:48:42 +01:00
Christian Kruse
18206b3a64 do not exit() in is_witness 2014-01-16 14:28:56 +01:00
Christian Kruse
91446bcf93 fix: do not try to reconnect infinitely 2014-01-10 17:26:02 +01:00
Christian Kruse
dcdf8788ae fix: handle connection loss to standby
We do basically the same as we do for the master since connections drop
from time to time
2014-01-10 17:12:03 +01:00
Christian Kruse
4fabfbbbd0 fix: do not exit in is_standby()
Instead we now return an int with 0 meaning „not a standby,“ 1 meaning
„is a standby“ and -1 meaning „connection dropped“
2014-01-10 17:11:16 +01:00
Christian Kruse
c41030b40e Merge branch 'REL2_0_STABLE'
Conflicts:
	HISTORY
	dbutils.h
	repmgr.c
	repmgrd.c
	version.h
2014-01-10 16:07:33 +01:00
Christian Kruse
a0fdadd5d2 this way it is much cleaner 2014-01-09 15:35:44 +01:00
Christian Kruse
4c3d7f80ed now code compiles with -ansi -pedantic and has less warnings 2014-01-09 14:45:07 +01:00
Christian Kruse
6e3fe059d8 added config options pg_bindir and pg_ctl_options 2014-01-09 14:44:34 +01:00
Christian Kruse
9f26254ac3 fix: added some missing initializers to avoid compiler warning 2014-01-09 13:33:22 +01:00
Christian Kruse
0e8ff1730e added handling of a PID file 2014-01-09 13:04:40 +01:00
Christian Kruse
634fdff303 fix: do not call setup_event_handlers() on WIN32
If we put setup_event_handlers() in #ifdef WIN32, we have to do it for
the call and the declaration, too
2014-01-09 12:57:16 +01:00
Christian Kruse
cbce29f009 fixed typos 2014-01-08 11:55:03 +01:00
Christian Kruse
920f925e4b added a new cli option --daemonize
This option forks the process and generates a new session. This
effectively detaches it from the shell. Don't forget to redirect stderr
or use syslog for logging!
2014-01-08 11:53:15 +01:00
Christian Kruse
9fe2d6886e white space cleanup 2014-01-07 16:42:06 +01:00
Christian Kruse
0068dd573a fix: do not compare pointers but the strings 2014-01-07 15:52:29 +01:00
Christian Kruse
d0f3cb59c7 fix: create data directory after sanity check 2014-01-07 14:42:55 +01:00
Christian Kruse
7428e92e10 fix: correctly check the return value of PQexec()
not only check if return value is not NULL but also check that the
returned result is a PGRES_COMMAND_OK (e.g. the INSERT was successful)
2014-01-07 14:27:31 +01:00
Christian Kruse
a97065113d fix: remove own node earlier if force is set
We have to remove our own node before we check for a new master if force
is set; else master register would fail on the second time since there
already is a master (ourselves), even if we specify -F
2014-01-07 14:16:58 +01:00
Christian Kruse
9e2f276fcf fix: do not exit after pg_start_backup() w/o pg_stop_backup() 2014-01-07 14:02:29 +01:00
Christian Kruse
b0cd2b5e43 fix: do not exit() in create_pgdir()
This could leave the database in a locked state (pg_start_backup()).
And since all calls to create_pgdir() handle the return value correctly
we simply replace the exit() by a return false
2014-01-07 14:01:46 +01:00
Jaime Casanova
079a7c9f16 In a failover situation get the nodes in a well defined order.
When deciding which node will be the new master, we should get the
nodes in a well defined order otherwise two standbys could process
nodes with the same priority in different order and end up with
a two master situation.
2013-07-26 00:59:50 -05:00
Jaime Casanova
bdf957ca52 Add a missing ')'. This is a typo introduced in commit
2bc8044fda

Per complaint from Carlos Chapi when compiling for a customer.
2013-07-13 12:39:13 -05:00
Jaime Casanova
67b451aa45 If PQgetCancel() returns NULL we should also return false.
Noted by Andres Freund.
2013-07-12 08:03:36 -05:00
Jaime Casanova
0a70d907ae Improve messages in wait_connection_availability, so we know what
error makes the failover procedure to start

By gripe from Andres Freund
2013-07-12 08:03:25 -05:00
Jaime Casanova
ab1d380843 If PQcancel() fails, consider it as if the master is failing.
Because PQcancel() establish a new synchronous connection to the
database, if it fails it means something wrong has happenned with
master. So instead of just ignore the failure, CancelQuery() now
reports a failure condition so we can detect master's death in
that situation.

This is very important specially when only postmaster crashes but
other children/backend connections are still there. Because the
children connection won't fail and CancelQuery() failure is our
only indication of something wrong happenning.
Currently we just ignore the PQcancel() failure which leads us to
a situation in which we just loop forever
trying to cancel the async query.

Reported by: Martin Euser <martin.euser@nl.abnamro.com>
Problem analyzed and bug spotted by: Andres Freund <andres@2ndquadrant.com>
Patch by: Jaime Casanova <jaime@2ndquadrant.com>
2013-07-10 10:21:51 -05:00
Jaime Casanova
672b237c4e Options -F -W -I -v doesn't accept arguments, which means that on
getopt_long shouldn't be marked with the colon (:) character.

This has been wrong since day one, so backpatching all the way until
1.1
2013-01-13 16:32:56 -05:00
Jaime Casanova
7d94151494 If the node is a witness don't bother asking its position, it always
will be 0/0. We just need to check that we can connect to it to determine
if we are in the majority.
2013-01-11 03:44:50 -05:00
Jaime Casanova
81b8a944de Fix a problem that caused a standby to promote itself without going to
voting procedure.

This is because of a race condition inside CheckPrimaryConnection().

This has independently reported by Alex Railean and Dumitru, and Frank Jördens.
Analyzed and fixed by Cédric Villemain.

The fix have been verified to work by Frank
2012-12-19 11:45:58 -05:00
Jaime Casanova
1b69282df9 Formatting code using astyle 2012-12-11 11:47:59 -05:00
Jaime Casanova
06dd252f69 To select new master it needs to know which standby has received more
xlog records from master, so it standby should use pg_last_xlog_receive_location()
to report their positions. This solves a possible situation in which
a standby that is considered as new master when promoted is no longer
the best option.
2012-12-03 09:27:12 -05:00
Jaime Casanova
d6bd5aa381 Add an option for STANDBY FOLLOW to wait for a master to appear.
This is important for autofailover to do the right thing when
standbys detected master death at different times.

While this is a new option, seems important for the autofailover
to work properly so i will consider the lack of it a bug and
will backpatch to 2.0 where autofailover was introduced.

For gripe from Alex Railean, about a standby not finding the new
master because the new master hasn't finish promoting.
2012-11-14 15:07:59 -05:00
Gabriele Bartolini
bbdcffa813 Fixed typos notified by lintian 2012-11-09 18:09:43 +01:00
Jaime Casanova
5f33d9d715 Fix node decision logic when priorities are involved. Currently if
two nodes with different prorities are equally good to be promoted
the second one (with a lower priority, considering them
in descending order) will win.

Per report from Brailean Dumitru
2012-09-16 02:38:28 -05:00
Jaime Casanova
877f4cf82e Add a comment 2012-09-16 02:23:16 -05:00
Jaime Casanova
949f5ee498 Keep compiler quiet. Noted when compiling in FreeBSD in which i
get a warning for an uninitialized variable.

Also, define InvalidXLogRecPtr. We don't really need it but using
it make the initialization future proof (considering that in 9.3
XLogRecPtr will change its structure).
2012-09-16 02:10:02 -05:00
Jaime Casanova
eb2f7efb4a When we have more command-line arguments than we should have we
need to show that last value and we should use only optind for that
instead of optind+1
2012-09-15 17:39:10 -05:00
Jaime Casanova
85ff3ec286 Fix documentation to always use -h sintax to refer to the node we
want to clone or connect to, instead of relying on the fact that
for some time putting that argument at last worked.
2012-09-15 17:38:42 -05:00
Jaime Casanova
2803bb92a8 Make repmgr compatible with FreeBSD.
We need to add an #include and make it use a different path for the
"true" binary.

Maybe we need to make this changes for all BSD systems but having no
evidence of that i prefer to make this only for systems with __FreeBSD__
2012-09-15 17:32:38 -05:00
Jaime Casanova
16fe41eecf Improve sample of commands for promote and follow 2012-09-11 15:53:57 -05:00
Jaime Casanova
d365a309fc Fix HISTORY to show from newest to oldest 2012-07-27 11:29:07 -05:00
Jaime Casanova
d5a41bb587 Fix tabs in HISTORY 2012-07-27 11:22:04 -05:00
Jaime Casanova
474d3217b4 Fix typos in RELEASE NOTES 2012-07-27 11:21:49 -05:00
Jaime Casanova
7a00d5a9a4 Now that we can have no monitoring we need to check all nodes at failover
not only those in repl_monitor
2012-07-21 17:53:15 -05:00
Jaime Casanova
5683b905dd New development branch is 2.1dev 2012-07-21 12:22:04 -05:00
25 changed files with 942 additions and 564 deletions

View File

@@ -1,4 +1,4 @@
Copyright (c) 2010-2012, 2ndQuadrant Limited Copyright (c) 2010-2014, 2ndQuadrant Limited
All rights reserved. All rights reserved.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify

11
HISTORY
View File

@@ -1,3 +1,14 @@
2.0stable 2014-01-30
Documentation fixes (Christian)
General refactoring, code quality improvements and stabilization work (Christian)
Added proper daemonizing (-d/--daemonize) (Christian)
Added PID file handling (-p/--pid-file) (Christian)
New config option: monitor_interval_secs (Christian)
New config option: retry_promote_interval (Christian)
New config option: logfile (Christian)
New config option: pg_bindir (Christian)
New config option: pgctl_options (Christian)
2.0beta2 2013-12-19 2.0beta2 2013-12-19
Improve autofailover logic and algorithms (Jaime, Andres) Improve autofailover logic and algorithms (Jaime, Andres)
Ignore pg_log when cloning (Jaime) Ignore pg_log when cloning (Jaime)

View File

@@ -1,6 +1,6 @@
# #
# Makefile # Makefile
# Copyright (c) 2ndQuadrant, 2010-2012 # Copyright (c) 2ndQuadrant, 2010-2014
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
@@ -21,7 +21,8 @@ repmgr: $(repmgr_OBJS)
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr $(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
ifdef USE_PGXS ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs) PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS) include $(PGXS)
else else
subdir = contrib/repmgr subdir = contrib/repmgr
@@ -32,9 +33,13 @@ endif
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now # XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
# is overriding pgxs install. # is overriding pgxs install.
install: install: install_prog install_ext
install_prog:
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)'
install_ext:
$(MAKE) -C sql install $(MAKE) -C sql install
ifneq (,$(DATA)$(DATA_built)) ifneq (,$(DATA)$(DATA_built))

View File

@@ -1085,7 +1085,7 @@ License and Contributions
========================= =========================
repmgr is licensed under the GPL v3. All of its code and documentation is repmgr is licensed under the GPL v3. All of its code and documentation is
Copyright 2010-2012, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for Copyright 2010-2014, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
details. details.
Main sponsorship of repmgr has been from 2ndQuadrant customers. Main sponsorship of repmgr has been from 2ndQuadrant customers.

19
TODO
View File

@@ -1,21 +1,18 @@
Known issues in repmgr Known issues in repmgr
====================== ======================
* The check for whether ``wal_keep_segments`` is considered large enough
does a string comparison rather than an integer one. It can give both
false positive (setting is large enough but flagged as too small) and
false negative (setting is too small but not noted as such) errors.
* When running repmgr against a remote machine, operations that start * When running repmgr against a remote machine, operations that start
the database server using the ``pg_ctl`` command may accidentally the database server using the ``pg_ctl`` command may accidentally
terminate after their associated ssh session ends. terminate after their associated ssh session ends.
* After running repmgrd as a regular foreground application, hitting
control-C causes the program to crash.
Planned feature improvements Planned feature improvements
============================ ============================
* Before running ``pg_start_backup()``, a sanity check that there is a * Timeline increases when promoting a standby
a working ssh connection to the destination would help find
configuration errors before disturbing the database. * A better check which standby did receive most of the data
* Make the fact that a standby may be delayed a factor in the voting
algorithm
* include support for delayed standbys

View File

@@ -13,13 +13,16 @@ Installation
For convenience, we define: For convenience, we define:
* node1 is the hostname fully qualified of the Master server, IP 192.168.1.10 **node1**
* node2 is the hostname fully qualified of the Standby server, IP 192.168.1.11 is the hostname fully qualified of the Master server, IP 192.168.1.10
* witness is the hostname fully qualified of the server used for witness, IP 192.168.1.12 **node2**
is the hostname fully qualified of the Standby server, IP 192.168.1.11
**witness**
is the hostname fully qualified of the server used for witness, IP 192.168.1.12
:Note: It is not recommanded to use name defining status of a server like «masterserver», **Note:** It is not recommanded to use name defining status of a server like «masterserver»,
this is a name leading to confusion once a failover take place and the Master is this is a name leading to confusion once a failover take place and the Master is
now on the «standbyserver». now on the «standbyserver».
Summary Summary
------- -------
@@ -30,17 +33,24 @@ and a witness-repmgrd is installed in a third server where it uses a PostgreSQL
cluster to communicate with other repmgrd daemons. cluster to communicate with other repmgrd daemons.
1. Install PostgreSQL in all the servers involved (including the server used for 1. Install PostgreSQL in all the servers involved (including the server used for
witness) witness)
2. Install repmgr in all the servers involved (including the server used for witness) 2. Install repmgr in all the servers involved (including the server used for witness)
3. Configure the Master PostreSQL 3. Configure the Master PostreSQL
4. Clone the Master to the Standby using "repmgr standby clone" command 4. Clone the Master to the Standby using "repmgr standby clone" command
5. Configure repmgr in all the servers involved (including the server used for witness) 5. Configure repmgr in all the servers involved (including the server used for witness)
6. Register Master and Standby nodes 6. Register Master and Standby nodes
7. Initiate witness server 7. Initiate witness server
8. Start the repmgrd daemons in all nodes 8. Start the repmgrd daemons in all nodes
:Note: A complete Hight-Availability design need at least 3 servers to still have **Note** A complete Hight-Availability design need at least 3 servers to still have
a backup node after a first failure. a backup node after a first failure.
Install PostgreSQL Install PostgreSQL
------------------ ------------------
@@ -76,8 +86,8 @@ Edit the file pg_hba.conf and add lines for the replication::
host repmgr repmgr 192.168.1.10/30 trust host repmgr repmgr 192.168.1.10/30 trust
host replication all 192.168.1.10/30 trust host replication all 192.168.1.10/30 trust
:Note: It is also possible to use a password authentication (md5), .pgpass file **Note:** It is also possible to use a password authentication (md5), .pgpass file
should be edited to allow connection between each node. should be edited to allow connection between each node.
Create the user and database to manage replication:: Create the user and database to manage replication::
@@ -137,16 +147,26 @@ Log in each server and configure repmgr by editing the file
promote_command='promote_command.sh' promote_command='promote_command.sh'
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf' follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
* *cluster* is the name of the current replication. **cluster**
* *node* is the number of the current node (1, 2 or 3 in the current example). is the name of the current replication.
* *node_name* is an identifier for every node. **node**
* *conninfo* is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo. is the number of the current node (1, 2 or 3 in the current example).
* *master_response_timeout* is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure. **node_name**
* *reconnect_attempts* is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure. is an identifier for every node.
* *reconnect_interval* is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure. **conninfo**
* *failover* configure behavior : *manual* or *automatic*. is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
* *promote_command* the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success. **master_response_timeout**
* *follow_command* the command executed to address the current standby to another Master. The command must return 0 on success. is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
**reconnect_attempts**
is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
**reconnect_interval**
is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
**failover**
configure behavior: *manual* or *automatic*.
**promote_command**
the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
**follow_command**
the command executed to address the current standby to another Master. The command must return 0 on success.
Register Master and Standby Register Master and Standby
--------------------------- ---------------------------
@@ -158,9 +178,7 @@ Register the node as Master::
su - postgres su - postgres
repmgr -f /etc/repmgr/repmgr.conf master register repmgr -f /etc/repmgr/repmgr.conf master register
Log in node2. Log in node2. Register it as a standby::
Register the node as Standby::
su - postgres su - postgres
repmgr -f /etc/repmgr/repmgr.conf standby register repmgr -f /etc/repmgr/repmgr.conf standby register
@@ -183,10 +201,10 @@ Start the repmgrd daemons
Log in node2 and witness. Log in node2 and witness.
su - postgres su - postgres
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1 repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
:Note: The Master does not need a repmgrd daemon. **Note:** The Master does not need a repmgrd daemon.
Suspend Automatic behavior Suspend Automatic behavior
@@ -201,12 +219,6 @@ Then, signal repmgrd daemon::
su - postgres su - postgres
kill -HUP `pidoff repmgrd` kill -HUP `pidoff repmgrd`
TODO : -HUP configuration update is not implemented and it should check its
configuration file against its configuration in DB, updating
accordingly the SQL conf (especialy the failover manual or auto)
this allow witness-standby and standby-not-promotable features
and simpler usage of the tool ;)
Usage Usage
===== =====

View File

@@ -1,6 +1,6 @@
/* /*
* check_dir.c - Directories management functions * check_dir.c - Directories management functions
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -225,12 +225,12 @@ is_pg_dir(char *dir)
struct stat sb; struct stat sb;
int r; int r;
// test pgdata /* test pgdata */
xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); xsnprintf(path, buf_sz, "%s/PG_VERSION", dir);
if (stat(path, &sb) == 0) if (stat(path, &sb) == 0)
return true; return true;
// test tablespace dir /* test tablespace dir */
sprintf(path, "ls %s/PG_*/ -I*", dir); sprintf(path, "ls %s/PG_*/ -I*", dir);
r = system(path); r = system(path);
if (r == 0) if (r == 0)
@@ -256,7 +256,7 @@ create_pgdir(char *dir, bool force)
{ {
log_err(_("couldn't create directory \"%s\"...\n"), log_err(_("couldn't create directory \"%s\"...\n"),
dir); dir);
exit(ERR_BAD_CONFIG); return false;
} }
break; break;
case 1: case 1:
@@ -268,7 +268,7 @@ create_pgdir(char *dir, bool force)
{ {
log_err(_("could not change permissions of directory \"%s\": %s\n"), log_err(_("could not change permissions of directory \"%s\": %s\n"),
dir, strerror(errno)); dir, strerror(errno));
exit(ERR_BAD_CONFIG); return false;
} }
break; break;
case 2: case 2:
@@ -293,7 +293,7 @@ create_pgdir(char *dir, bool force)
"If you are sure you want to clone here, " "If you are sure you want to clone here, "
"please check there is no PostgreSQL server " "please check there is no PostgreSQL server "
"running and use the --force option\n")); "running and use the --force option\n"));
exit(ERR_BAD_CONFIG); return false;
} }
return false; return false;
@@ -301,7 +301,7 @@ create_pgdir(char *dir, bool force)
/* Trouble accessing directory */ /* Trouble accessing directory */
log_err(_("could not access directory \"%s\": %s\n"), log_err(_("could not access directory \"%s\": %s\n"),
dir, strerror(errno)); dir, strerror(errno));
exit(ERR_BAD_CONFIG); return false;
} }
return true; return true;
} }

View File

@@ -1,6 +1,6 @@
/* /*
* check_dir.h * check_dir.h
* Copyright (c) 2ndQuadrant, 2010-2012 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by

View File

@@ -1,6 +1,6 @@
/* /*
* config.c - Functions to parse the config file * config.c - Functions to parse the config file
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -42,6 +42,8 @@ parse_config(const char *config_file, t_configuration_options *options)
memset(options->follow_command, 0, sizeof(options->follow_command)); memset(options->follow_command, 0, sizeof(options->follow_command));
memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->rsync_options, 0, sizeof(options->rsync_options));
memset(options->ssh_options, 0, sizeof(options->ssh_options)); memset(options->ssh_options, 0, sizeof(options->ssh_options));
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
memset(options->pgctl_options, 0, sizeof(options->pgctl_options));
/* if nothing has been provided defaults to 60 */ /* if nothing has been provided defaults to 60 */
options->master_response_timeout = 60; options->master_response_timeout = 60;
@@ -50,6 +52,9 @@ parse_config(const char *config_file, t_configuration_options *options)
options->reconnect_attempts = 6; options->reconnect_attempts = 6;
options->reconnect_intvl = 10; options->reconnect_intvl = 10;
options->monitor_interval_secs = 2;
options->retry_promote_interval_secs = 300;
/* /*
* Since some commands don't require a config file at all, not * Since some commands don't require a config file at all, not
* having one isn't necessarily a problem. * having one isn't necessarily a problem.
@@ -114,6 +119,16 @@ parse_config(const char *config_file, t_configuration_options *options)
options->reconnect_attempts = atoi(value); options->reconnect_attempts = atoi(value);
else if (strcmp(name, "reconnect_interval") == 0) else if (strcmp(name, "reconnect_interval") == 0)
options->reconnect_intvl = atoi(value); options->reconnect_intvl = atoi(value);
else if (strcmp(name, "pg_bindir") == 0)
strncpy (options->pg_bindir, value, MAXLEN);
else if (strcmp(name, "pg_ctl_options") == 0)
strncpy (options->pgctl_options, value, MAXLEN);
else if (strcmp(name, "logfile") == 0)
strncpy(options->logfile, value, MAXLEN);
else if (strcmp(name, "monitor_interval_secs") == 0)
options->monitor_interval_secs = atoi(value);
else if (strcmp(name, "retry_promote_interval_secs") == 0)
options->retry_promote_interval_secs = atoi(value);
else else
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value); log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
} }
@@ -151,6 +166,12 @@ parse_config(const char *config_file, t_configuration_options *options)
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n")); log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (*options->pg_bindir == '\0')
{
log_err(_("pg_bindir config value not found. Check the configuration file.\n"));
exit(ERR_BAD_CONFIG);
}
} }
@@ -221,49 +242,49 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
parse_config(config_file, &new_options); parse_config(config_file, &new_options);
if (new_options.node == -1) if (new_options.node == -1)
{ {
log_warning(_("\nCannot load new configuration, will keep current one.\n")); log_warning(_("Cannot load new configuration, will keep current one.\n"));
return false; return false;
} }
if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
{ {
log_warning(_("\nCannot change cluster name, will keep current configuration.\n")); log_warning(_("Cannot change cluster name, will keep current configuration.\n"));
return false; return false;
} }
if (new_options.node != orig_options->node) if (new_options.node != orig_options->node)
{ {
log_warning(_("\nCannot change node number, will keep current configuration.\n")); log_warning(_("Cannot change node number, will keep current configuration.\n"));
return false; return false;
} }
if (new_options.node_name != orig_options->node_name) if (strcmp(new_options.node_name, orig_options->node_name) != 0)
{ {
log_warning(_("\nCannot change standby name, will keep current configuration.\n")); log_warning(_("Cannot change standby name, will keep current configuration.\n"));
return false; return false;
} }
if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER) if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
{ {
log_warning(_("\nNew value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
return false; return false;
} }
if (new_options.master_response_timeout <= 0) if (new_options.master_response_timeout <= 0)
{ {
log_warning(_("\nNew value for master_response_timeout is not valid. Should be greater than zero.\n")); log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n"));
return false; return false;
} }
if (new_options.reconnect_attempts < 0) if (new_options.reconnect_attempts < 0)
{ {
log_warning(_("\nNew value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
return false; return false;
} }
if (new_options.reconnect_intvl < 0) if (new_options.reconnect_intvl < 0)
{ {
log_warning(_("\nNew value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
return false; return false;
} }
@@ -271,7 +292,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
conn = establishDBConnection(new_options.conninfo, false); conn = establishDBConnection(new_options.conninfo, false);
if (!conn || (PQstatus(conn) != CONNECTION_OK)) if (!conn || (PQstatus(conn) != CONNECTION_OK))
{ {
log_warning(_("\nconninfo string is not valid, will keep current configuration.\n")); log_warning(_("conninfo string is not valid, will keep current configuration.\n"));
return false; return false;
} }
PQfinish(conn); PQfinish(conn);

View File

@@ -1,6 +1,6 @@
/* /*
* config.h * config.h
* Copyright (c) 2ndQuadrant, 2010-2012 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -40,8 +40,15 @@ typedef struct
int master_response_timeout; int master_response_timeout;
int reconnect_attempts; int reconnect_attempts;
int reconnect_intvl; int reconnect_intvl;
char pg_bindir[MAXLEN];
char pgctl_options[MAXLEN];
char logfile[MAXLEN];
int monitor_interval_secs;
int retry_promote_interval_secs;
} t_configuration_options; } t_configuration_options;
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", 0, 0 }
void parse_config(const char *config_file, t_configuration_options *options); void parse_config(const char *config_file, t_configuration_options *options);
void parse_line(char *buff, char *name, char *value); void parse_line(char *buff, char *name, char *value);
char *trim(char *s); char *trim(char *s);

View File

@@ -1,6 +1,6 @@
/* /*
* dbutils.c - Database connection/management functions * dbutils.c - Database connection/management functions
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -71,25 +71,22 @@ establishDBConnectionByParams(const char *keywords[], const char *values[],const
return conn; return conn;
} }
bool int
is_standby(PGconn *conn) is_standby(PGconn *conn)
{ {
PGresult *res; PGresult *res;
bool result = false; int result = 0;
res = PQexec(conn, "SELECT pg_is_in_recovery()"); res = PQexec(conn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("Can't query server mode: %s"), log_err(_("Can't query server mode: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); result = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0) result = 1;
result = true;
PQclear(res); PQclear(res);
return result; return result;
@@ -97,11 +94,11 @@ is_standby(PGconn *conn)
bool int
is_witness(PGconn *conn, char *schema, char *cluster, int node_id) is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
{ {
PGresult *res; PGresult *res;
bool result = false; int result = 0;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d", sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d",
@@ -110,13 +107,10 @@ is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("Can't query server mode: %s"), PQerrorMessage(conn)); log_err(_("Can't query server mode: %s"), PQerrorMessage(conn));
PQclear(res); result = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0) result = 1;
result = true;
PQclear(res); PQclear(res);
return result; return result;
@@ -138,7 +132,7 @@ is_pgup(PGconn *conn, int timeout)
{ {
if (twice) if (twice)
return false; return false;
PQreset(conn); // reconnect PQreset(conn); /* reconnect */
twice = true; twice = true;
} }
else else
@@ -164,10 +158,10 @@ is_pgup(PGconn *conn, int timeout)
break; break;
failed: failed:
// we need to retry, because we might just have loose the connection once /* we need to retry, because we might just have loose the connection once */
if (twice) if (twice)
return false; return false;
PQreset(conn); // reconnect PQreset(conn); /* reconnect */
twice = true; twice = true;
} }
} }
@@ -198,8 +192,7 @@ pg_version(PGconn *conn, char* major_version)
log_err(_("Version check PQexec failed: %s"), log_err(_("Version check PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); PQclear(res);
PQfinish(conn); return NULL;
exit(ERR_DB_QUERY);
} }
major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version1 = atoi(PQgetvalue(res, 0, 0));
@@ -220,12 +213,13 @@ pg_version(PGconn *conn, char* major_version)
} }
bool int
guc_setted(PGconn *conn, const char *parameter, const char *op, guc_set(PGconn *conn, const char *parameter, const char *op,
const char *value) const char *value)
{ {
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
int retval = 1;
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting %s '%s'", " WHERE name = '%s' AND setting %s '%s'",
@@ -236,30 +230,29 @@ guc_setted(PGconn *conn, const char *parameter, const char *op,
{ {
log_err(_("GUC setting check PQexec failed: %s"), log_err(_("GUC setting check PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); retval = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
if (PQntuples(res) == 0) else if (PQntuples(res) == 0)
{ {
PQclear(res); retval = 0;
return false;
} }
PQclear(res); PQclear(res);
return true; return retval;
} }
/** /**
* Just like guc_setted except with an extra parameter containing the name of * Just like guc_set except with an extra parameter containing the name of
* the pg datatype so that the comparison can be done properly. * the pg datatype so that the comparison can be done properly.
*/ */
bool int
guc_setted_typed(PGconn *conn, const char *parameter, const char *op, guc_set_typed(PGconn *conn, const char *parameter, const char *op,
const char *value, const char *datatype) const char *value, const char *datatype)
{ {
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
int retval = 1;
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting::%s %s '%s'::%s", " WHERE name = '%s' AND setting::%s %s '%s'::%s",
@@ -270,18 +263,16 @@ guc_setted_typed(PGconn *conn, const char *parameter, const char *op,
{ {
log_err(_("GUC setting check PQexec failed: %s"), log_err(_("GUC setting check PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res); retval = -1;
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
if (PQntuples(res) == 0) else if (PQntuples(res) == 0)
{ {
PQclear(res); retval = 0;
return false;
} }
PQclear(res); PQclear(res);
return true; return retval;
} }
@@ -289,7 +280,7 @@ const char *
get_cluster_size(PGconn *conn) get_cluster_size(PGconn *conn)
{ {
PGresult *res; PGresult *res;
const char *size; const char *size = NULL;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
sqlquery_snprintf( sqlquery_snprintf(
@@ -302,11 +293,12 @@ get_cluster_size(PGconn *conn)
{ {
log_err(_("Get cluster size PQexec failed: %s"), log_err(_("Get cluster size PQexec failed: %s"),
PQerrorMessage(conn)); PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_DB_QUERY);
} }
size = PQgetvalue(res, 0, 0); else
{
size = PQgetvalue(res, 0, 0);
}
PQclear(res); PQclear(res);
return size; return size;
} }
@@ -367,8 +359,7 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
log_err(_("Can't get nodes info: %s\n"), log_err(_("Can't get nodes info: %s\n"),
PQerrorMessage(standby_conn)); PQerrorMessage(standby_conn));
PQclear(res1); PQclear(res1);
PQfinish(standby_conn); return NULL;
exit(ERR_DB_QUERY);
} }
for (i = 0; i < PQntuples(res1); i++) for (i = 0; i < PQntuples(res1); i++)
@@ -443,7 +434,7 @@ wait_connection_availability(PGconn *conn, int timeout)
{ {
if (PQconsumeInput(conn) == 0) if (PQconsumeInput(conn) == 0)
{ {
log_warning(_("wait_connection_availability: could not receive data from master. %s\n"), log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"),
PQerrorMessage(conn)); PQerrorMessage(conn));
return 0; return 0;
} }

View File

@@ -1,6 +1,6 @@
/* /*
* dbutils.h * dbutils.h
* Copyright (c) 2ndQuadrant, 2010-2012 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -26,14 +26,15 @@ PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
PGconn *establishDBConnectionByParams(const char *keywords[], PGconn *establishDBConnectionByParams(const char *keywords[],
const char *values[], const char *values[],
const bool exit_on_error); const bool exit_on_error);
bool is_standby(PGconn *conn); int is_standby(PGconn *conn);
bool is_witness(PGconn *conn, char *schema, char *cluster, int node_id); int is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
bool is_pgup(PGconn *conn, int timeout); bool is_pgup(PGconn *conn, int timeout);
char *pg_version(PGconn *conn, char* major_version); char *pg_version(PGconn *conn, char* major_version);
bool guc_setted(PGconn *conn, const char *parameter, const char *op, int guc_set(PGconn *conn, const char *parameter, const char *op,
const char *value); const char *value);
bool guc_setted_typed(PGconn *conn, const char *parameter, const char *op, int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
const char *value, const char *datatype); const char *value, const char *datatype);
const char *get_cluster_size(PGconn *conn); const char *get_cluster_size(PGconn *conn);
PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster, PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
int *master_id, char *master_conninfo_out); int *master_id, char *master_conninfo_out);

View File

@@ -1,6 +1,6 @@
/* /*
* errcode.h * errcode.h
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -35,5 +35,6 @@
#define ERR_STR_OVERFLOW 10 #define ERR_STR_OVERFLOW 10
#define ERR_FAILOVER_FAIL 11 #define ERR_FAILOVER_FAIL 11
#define ERR_BAD_SSH 12 #define ERR_BAD_SSH 12
#define ERR_SYS_FAILURE 13
#endif /* _ERRCODE_H_ */ #endif /* _ERRCODE_H_ */

9
log.c
View File

@@ -1,6 +1,6 @@
/* /*
* log.c - Logging methods * log.c - Logging methods
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This module is a set of methods for logging (currently only syslog) * This module is a set of methods for logging (currently only syslog)
* *
@@ -68,7 +68,7 @@ static int detect_log_facility(const char* facility);
int log_type = REPMGR_STDERR; int log_type = REPMGR_STDERR;
int log_level = LOG_NOTICE; int log_level = LOG_NOTICE;
bool logger_init(const char* ident, const char* level, const char* facility) bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility)
{ {
int l; int l;
@@ -140,6 +140,11 @@ bool logger_init(const char* ident, const char* level, const char* facility)
#endif #endif
if (*opts->logfile)
{
freopen(opts->logfile, "a", stderr);
}
return true; return true;
} }

6
log.h
View File

@@ -1,6 +1,6 @@
/* /*
* log.h * log.h
* Copyright (c) 2ndQuadrant, 2010-2012 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -25,7 +25,7 @@
#define REPMGR_SYSLOG 1 #define REPMGR_SYSLOG 1
#define REPMGR_STDERR 2 #define REPMGR_STDERR 2
void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...); void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
/* Standard error logging */ /* Standard error logging */
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__) #define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
@@ -114,7 +114,7 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
/* Logger initialisation and shutdown */ /* Logger initialisation and shutdown */
bool logger_shutdown(void); bool logger_shutdown(void);
bool logger_init(const char* ident, const char* level, const char* facility); bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility);
void logger_min_verbose(int minimum); void logger_min_verbose(int minimum);
extern int log_type; extern int log_type;

331
repmgr.c
View File

@@ -1,6 +1,6 @@
/* /*
* repmgr.c - Command interpreter for the repmgr * repmgr.c - Command interpreter for the repmgr
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This module is a command-line utility to easily setup a cluster of * This module is a command-line utility to easily setup a cluster of
* hot standby servers for an HA environment * hot standby servers for an HA environment
@@ -85,8 +85,8 @@ bool need_a_node = true;
bool require_password = false; bool require_password = false;
/* Initialization of runtime options */ /* Initialization of runtime options */
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }; t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1 }; t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER;
static char *server_mode = NULL; static char *server_mode = NULL;
static char *server_cmd = NULL; static char *server_cmd = NULL;
@@ -268,11 +268,8 @@ main(int argc, char **argv)
} }
} }
switch (optind < argc) if (optind < argc)
{ {
case 0:
break;
default:
log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"), log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"),
progname, argv[optind]); progname, argv[optind]);
usage(); usage();
@@ -322,7 +319,7 @@ main(int argc, char **argv)
* at, but it often requires detailed logging to troubleshoot * at, but it often requires detailed logging to troubleshoot
* problems. * problems.
*/ */
logger_init(progname, options.loglevel, options.logfacility); logger_init(&options, progname, options.loglevel, options.logfacility);
if (runtime_options.verbose) if (runtime_options.verbose)
logger_min_verbose(LOG_INFO); logger_min_verbose(LOG_INFO);
@@ -486,29 +483,35 @@ do_master_register(void)
{ {
PGconn *conn; PGconn *conn;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret_ver;
bool schema_exists = false; bool schema_exists = false;
char schema_quoted[MAXLEN]; char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR]; char master_version[MAXVERSIONSTR];
int ret;
conn = establishDBConnection(options.conninfo, true); conn = establishDBConnection(options.conninfo, true);
/* master should be v9 or better */ /* master should be v9 or better */
log_info(_("%s connecting to master database\n"), progname); log_info(_("%s connecting to master database\n"), progname);
pg_version(conn, master_version); ret_ver = pg_version(conn, master_version);
if (strcmp(master_version, "") == 0) if (ret_ver == NULL || strcmp(master_version, "") == 0)
{ {
PQfinish(conn); PQfinish(conn);
log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); if (ret_ver != NULL)
log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
return; return;
} }
/* Check we are a master */ /* Check we are a master */
log_info(_("%s connected to master, checking its state\n"), progname); log_info(_("%s connected to master, checking its state\n"), progname);
if (is_standby(conn)) ret = is_standby(conn);
if (ret)
{ {
log_err(_("Trying to register a standby node as a master\n")); log_err(_(ret == 1 ? "Trying to register a standby node as a master\n" :
"Connection to node lost!\n"));
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -565,6 +568,22 @@ do_master_register(void)
PGconn *master_conn; PGconn *master_conn;
int id; int id;
if (runtime_options.force)
{
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_warning(_("Cannot delete node details, %s\n"),
PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
/* Ensure there isn't any other master already registered */ /* Ensure there isn't any other master already registered */
master_conn = getMasterConnection(conn, repmgr_schema, master_conn = getMasterConnection(conn, repmgr_schema,
options.cluster_name, &id,NULL); options.cluster_name, &id,NULL);
@@ -577,21 +596,6 @@ do_master_register(void)
} }
/* Now register the master */ /* Now register the master */
if (runtime_options.force)
{
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_warning(_("Cannot delete node details, %s\n"),
PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) " sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) "
"VALUES (%d, '%s', '%s', '%s', %d)", "VALUES (%d, '%s', '%s', '%s', %d)",
@@ -619,10 +623,10 @@ do_standby_register(void)
{ {
PGconn *conn; PGconn *conn;
PGconn *master_conn; PGconn *master_conn;
int master_id; int master_id, ret;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret_ver;
char schema_quoted[MAXLEN]; char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR]; char master_version[MAXVERSIONSTR];
@@ -635,18 +639,22 @@ do_standby_register(void)
/* should be v9 or better */ /* should be v9 or better */
log_info(_("%s connected to standby, checking its state\n"), progname); log_info(_("%s connected to standby, checking its state\n"), progname);
pg_version(conn, standby_version); ret_ver = pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0) if (ret_ver == NULL || strcmp(standby_version, "") == 0)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); if (ret_ver != NULL)
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* Check we are a standby */ /* Check we are a standby */
if (!is_standby(conn)) ret = is_standby(conn);
if (ret == 0 || ret == -1)
{ {
log_err(_("repmgr: This node should be a standby (%s)\n"), options.conninfo); log_err(_(ret == 0 ? "repmgr: This node should be a standby (%s)\n" :
"repmgr: connection to node (%s) lost\n"), options.conninfo);
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -697,12 +705,13 @@ do_standby_register(void)
/* master should be v9 or better */ /* master should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname); log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(master_conn, master_version); ret_ver = pg_version(master_conn, master_version);
if (strcmp(master_version, "") == 0) if (ret_ver == NULL || strcmp(master_version, "") == 0)
{ {
PQfinish(conn); PQfinish(conn);
PQfinish(master_conn); PQfinish(master_conn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); if (ret_ver != NULL)
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -742,7 +751,8 @@ do_standby_register(void)
options.conninfo, options.priority); options.conninfo, options.priority);
log_debug(_("standby register: %s\n"), sqlquery); log_debug(_("standby register: %s\n"), sqlquery);
if (!PQexec(master_conn, sqlquery)) res = PQexec(master_conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot insert node details, %s\n"), log_err(_("Cannot insert node details, %s\n"),
PQerrorMessage(master_conn)); PQerrorMessage(master_conn));
@@ -765,10 +775,11 @@ do_standby_clone(void)
{ {
PGconn *conn; PGconn *conn;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret;
const char *cluster_size;
int r = 0; int r = 0, retval = SUCCESS;
int i; int i, is_standby_retval;
bool flag_success = false; bool flag_success = false;
bool test_mode = false; bool test_mode = false;
@@ -819,45 +830,60 @@ do_standby_clone(void)
/* primary should be v9 or better */ /* primary should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname); log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(conn, master_version); ret = pg_version(conn, master_version);
if (strcmp(master_version, "") == 0) if (ret == NULL || strcmp(master_version, "") == 0)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); if (ret != NULL)
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* Check we are cloning a primary node */ /* Check we are cloning a primary node */
if (is_standby(conn)) is_standby_retval = is_standby(conn);
if (is_standby_retval)
{ {
log_err(_(is_standby_retval == 1 ? "The command should clone a primary node\n" :
"Connection to node lost!\n"));
PQfinish(conn); PQfinish(conn);
log_err(_("\nThe command should clone a primary node\n"));
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* And check if it is well configured */ /* And check if it is well configured */
if (!guc_setted(conn, "wal_level", "=", "hot_standby")) i = guc_set(conn, "wal_level", "=", "hot_standby");
if (i == 0 || i == -1)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); if (i == 0)
log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (!guc_setted_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer"))
i = guc_set_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer");
if (i == 0 || i == -1)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments); if (i == 0)
log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (!guc_setted(conn, "archive_mode", "=", "on"))
i = guc_set(conn, "archive_mode", "=", "on");
if (i == 0 || i == -1)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); if (i == 0)
log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
if (!guc_setted(conn, "hot_standby", "=", "on"))
i = guc_set(conn, "hot_standby", "=", "on");
if (i == 0 || i == -1)
{ {
PQfinish(conn); PQfinish(conn);
log_err(_("%s needs parameter 'hot_standby' to be set to 'on'\n"), progname); if (i == 0)
log_err(_("%s needs parameter 'hot_standby' to be set to 'on'\n"), progname);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -952,7 +978,10 @@ do_standby_clone(void)
} }
PQclear(res); PQclear(res);
log_info(_("Successfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); cluster_size = get_cluster_size(conn);
if (cluster_size == NULL)
exit(ERR_DB_QUERY);
log_info(_("Successfully connected to primary. Current installation size is %s\n"), cluster_size);
/* /*
* XXX master_xlog_directory should be discovered from master configuration * XXX master_xlog_directory should be discovered from master configuration
@@ -1036,6 +1065,8 @@ do_standby_clone(void)
{ {
log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"), log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"),
progname, local_data_directory); progname, local_data_directory);
r = ERR_BAD_CONFIG;
retval = ERR_BAD_CONFIG;
goto stop_backup; goto stop_backup;
} }
@@ -1175,7 +1206,7 @@ stop_backup:
log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn)); log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn));
PQclear(res); PQclear(res);
PQfinish(conn); PQfinish(conn);
exit(ERR_STOP_BACKUP); exit(retval);
} }
last_wal_segment = PQgetvalue(res, 0, 0); last_wal_segment = PQgetvalue(res, 0, 0);
@@ -1243,13 +1274,13 @@ do_standby_promote(void)
{ {
PGconn *conn; PGconn *conn;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret;
char script[MAXLEN]; char script[MAXLEN];
PGconn *old_master_conn; PGconn *old_master_conn;
int old_master_id; int old_master_id;
int r; int r, retval;
char data_dir[MAXLEN]; char data_dir[MAXLEN];
char recovery_file_path[MAXFILENAME]; char recovery_file_path[MAXFILENAME];
char recovery_done_path[MAXFILENAME]; char recovery_done_path[MAXFILENAME];
@@ -1262,18 +1293,22 @@ do_standby_promote(void)
/* we need v9 or better */ /* we need v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname); log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(conn, standby_version); ret = pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0) if (ret == NULL || strcmp(standby_version, "") == 0)
{ {
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); if (ret != NULL)
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* Check we are in a standby node */ /* Check we are in a standby node */
if (!is_standby(conn)) retval = is_standby(conn);
if (retval == 0 || retval == -1)
{ {
log_err(_("%s: The command should be executed on a standby node\n"), progname); log_err(_(retval == 0 ? "%s: The command should be executed on a standby node\n" :
"%s: connection to node lost!\n"), progname);
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -1313,13 +1348,12 @@ do_standby_promote(void)
rename(recovery_file_path, recovery_done_path); rename(recovery_file_path, recovery_done_path);
/* /*
* We assume the pg_ctl script is in the PATH. Restart and wait for * Restart and wait for the server to finish starting, so that the check
* the server to finish starting, so that the check below will * below will find an active server rather than one starting up. This may
* find an active server rather than one starting up. This may
* hang for up the default timeout (60 seconds). * hang for up the default timeout (60 seconds).
*/ */
log_notice(_("%s: restarting server using pg_ctl\n"), progname); log_notice(_("%s: restarting server using %s/pg_ctl\n"), progname, options.pg_bindir);
maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir); maxlen_snprintf(script, "%s/pg_ctl %s -D %s -w -m fast restart", options.pg_bindir, options.pgctl_options, data_dir);
r = system(script); r = system(script);
if (r != 0) if (r != 0)
{ {
@@ -1330,13 +1364,15 @@ do_standby_promote(void)
/* reconnect to check we got promoted */ /* reconnect to check we got promoted */
log_info(_("%s connecting to now restarted database\n"), progname); log_info(_("%s connecting to now restarted database\n"), progname);
conn = establishDBConnection(options.conninfo, true); conn = establishDBConnection(options.conninfo, true);
if (is_standby(conn)) retval = is_standby(conn);
if (retval)
{ {
log_err(_("\n%s: STANDBY PROMOTE failed, this is still a standby node.\n"), progname); log_err(_(retval == 1 ? "%s: STANDBY PROMOTE failed, this is still a standby node.\n" :
"%s: connection to node lost!\n"), progname);
} }
else else
{ {
log_err(_("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"), progname); log_err(_("%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"), progname);
} }
PQfinish(conn); PQfinish(conn);
return; return;
@@ -1348,13 +1384,13 @@ do_standby_follow(void)
{ {
PGconn *conn; PGconn *conn;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret;
char script[MAXLEN]; char script[MAXLEN];
char master_conninfo[MAXLEN]; char master_conninfo[MAXLEN];
PGconn *master_conn; PGconn *master_conn;
int master_id; int master_id;
int r; int r, retval;
char data_dir[MAXLEN]; char data_dir[MAXLEN];
char master_version[MAXVERSIONSTR]; char master_version[MAXVERSIONSTR];
@@ -1366,18 +1402,22 @@ do_standby_follow(void)
/* Check we are in a standby node */ /* Check we are in a standby node */
log_info(_("%s connected to standby, checking its state\n"), progname); log_info(_("%s connected to standby, checking its state\n"), progname);
if (!is_standby(conn)) retval = is_standby(conn);
if (retval == 0 || retval == -1)
{ {
log_err(_("\n%s: The command should be executed in a standby node\n"), progname); log_err(_(retval == 0 ? "%s: The command should be executed in a standby node\n" :
"%s: connection to node lost!\n"), progname);
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* should be v9 or better */ /* should be v9 or better */
pg_version(conn, standby_version); ret = pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0) if (ret == NULL || strcmp(standby_version, "") == 0)
{ {
log_err(_("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); if (ret != NULL)
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
PQfinish(conn); PQfinish(conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -1390,6 +1430,11 @@ do_standby_follow(void)
do do
{ {
if (!is_pgup(conn, options.master_response_timeout))
{
conn = establishDBConnection(options.conninfo, true);
}
master_conn = getMasterConnection(conn, repmgr_schema, master_conn = getMasterConnection(conn, repmgr_schema,
options.cluster_name, &master_id,(char *) &master_conninfo); options.cluster_name, &master_id,(char *) &master_conninfo);
} }
@@ -1403,9 +1448,12 @@ do_standby_follow(void)
} }
/* Check we are going to point to a master */ /* Check we are going to point to a master */
if (is_standby(master_conn)) retval = is_standby(master_conn);
if (retval)
{ {
log_err(_("%s: The node to follow should be a master\n"), progname); log_err(_(retval == 1 ? "%s: The node to follow should be a master\n" :
"%s: connection to node lost!\n"), progname);
PQfinish(conn); PQfinish(conn);
PQfinish(master_conn); PQfinish(master_conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
@@ -1413,10 +1461,11 @@ do_standby_follow(void)
/* should be v9 or better */ /* should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname); log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(master_conn, master_version); ret = pg_version(master_conn, master_version);
if (strcmp(master_version, "") == 0) if (ret == NULL || strcmp(master_version, "") == 0)
{ {
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); if (ret != NULL)
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
PQfinish(conn); PQfinish(conn);
PQfinish(master_conn); PQfinish(master_conn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
@@ -1441,7 +1490,7 @@ do_standby_follow(void)
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN); strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
PQfinish(master_conn); PQfinish(master_conn);
log_info(_("%s Changing standby's master"),progname); log_info(_("%s Changing standby's master\n"),progname);
/* Get the data directory full path */ /* Get the data directory full path */
sqlquery_snprintf(sqlquery, "SELECT setting " sqlquery_snprintf(sqlquery, "SELECT setting "
@@ -1464,8 +1513,7 @@ do_standby_follow(void)
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
/* Finally, restart the service */ /* Finally, restart the service */
/* We assume the pg_ctl script is in the PATH */ maxlen_snprintf(script, "%s/pg_ctl %s -w -D %s -m fast restart", options.pg_bindir, options.pgctl_options, data_dir);
maxlen_snprintf(script, "pg_ctl -w -D %s -m fast restart", data_dir);
r = system(script); r = system(script);
if (r != 0) if (r != 0)
{ {
@@ -1483,27 +1531,19 @@ do_witness_create(void)
PGconn *masterconn; PGconn *masterconn;
PGconn *witnessconn; PGconn *witnessconn;
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN], *ret;
char script[MAXLEN]; char script[MAXLEN];
char buf[MAXLEN]; char buf[MAXLEN];
FILE *pg_conf = NULL; FILE *pg_conf = NULL;
int r = 0; int r = 0, retval;
int i; int i;
char master_version[MAXVERSIONSTR]; char master_version[MAXVERSIONSTR];
char master_hba_file[MAXLEN]; char master_hba_file[MAXLEN];
/* Check this directory could be used as a PGDATA dir */
if (!create_pgdir(runtime_options.dest_dir, runtime_options.force))
{
log_err(_("witness create: couldn't create data directory (\"%s\") for witness"),
runtime_options.dest_dir);
exit(ERR_BAD_CONFIG);
}
/* Connection parameters for master only */ /* Connection parameters for master only */
keywords[0] = "host"; keywords[0] = "host";
values[0] = runtime_options.host; values[0] = runtime_options.host;
@@ -1519,18 +1559,22 @@ do_witness_create(void)
} }
/* primary should be v9 or better */ /* primary should be v9 or better */
pg_version(masterconn, master_version); ret = pg_version(masterconn, master_version);
if (strcmp(master_version, "") == 0) if (ret == NULL || strcmp(master_version, "") == 0)
{ {
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); if (ret != NULL)
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
PQfinish(masterconn); PQfinish(masterconn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* Check we are connecting to a primary node */ /* Check we are connecting to a primary node */
if (is_standby(masterconn)) retval = is_standby(masterconn);
if (retval)
{ {
log_err(_("The command should not run on a standby node\n")); log_err(_(retval == 1 ? "The command should not run on a standby node\n" :
"Connection to node lost!\n"));
PQfinish(masterconn); PQfinish(masterconn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -1545,6 +1589,15 @@ do_witness_create(void)
exit(ERR_BAD_SSH); exit(ERR_BAD_SSH);
} }
/* Check this directory could be used as a PGDATA dir */
if (!create_pgdir(runtime_options.dest_dir, runtime_options.force))
{
log_err(_("witness create: couldn't create data directory (\"%s\") for witness"),
runtime_options.dest_dir);
exit(ERR_BAD_CONFIG);
}
/* /*
* To create a witness server we need to: * To create a witness server we need to:
* 1) initialize the cluster * 1) initialize the cluster
@@ -1553,8 +1606,7 @@ do_witness_create(void)
*/ */
/* Create the cluster for witness */ /* Create the cluster for witness */
/* We assume the pg_ctl script is in the PATH */ sprintf(script, "%s/pg_ctl %s -D %s init -o \"-W\"", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir);
sprintf(script, "pg_ctl -D %s init -o \"-W\"", runtime_options.dest_dir);
log_info("Initialize cluster for witness: %s.\n", script); log_info("Initialize cluster for witness: %s.\n", script);
r = system(script); r = system(script);
@@ -1573,7 +1625,7 @@ do_witness_create(void)
pg_conf = fopen(buf, "a"); pg_conf = fopen(buf, "a");
if (pg_conf == NULL) if (pg_conf == NULL)
{ {
log_err(_("\n%s: could not open \"%s\" for adding extra config: %s\n"), progname, buf, strerror(errno)); log_err(_("%s: could not open \"%s\" for adding extra config: %s\n"), progname, buf, strerror(errno));
PQfinish(masterconn); PQfinish(masterconn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -1627,7 +1679,7 @@ do_witness_create(void)
} }
/* start new instance */ /* start new instance */
sprintf(script, "pg_ctl -w -D %s start", runtime_options.dest_dir); sprintf(script, "%s/pg_ctl %s -w -D %s start", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir);
log_info(_("Start cluster for witness: %s"), script); log_info(_("Start cluster for witness: %s"), script);
r = system(script); r = system(script);
if (r != 0) if (r != 0)
@@ -1640,7 +1692,7 @@ do_witness_create(void)
/* register ourselves in the master */ /* register ourselves in the master */
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority, witness) " sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority, witness) "
"VALUES (%d, '%s', '%s', '%s', %d, true)", "VALUES (%d, '%s', '%s', '%s', %d, true)",
repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo); repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo, options.priority);
log_debug(_("witness create: %s"), sqlquery); log_debug(_("witness create: %s"), sqlquery);
if (!PQexec(masterconn, sqlquery)) if (!PQexec(masterconn, sqlquery))
@@ -1680,8 +1732,8 @@ do_witness_create(void)
static void static void
usage(void) usage(void)
{ {
log_err(_("\n\n%s: Replicator manager \n"), progname); fprintf(stderr, _("\n\n%s: Replicator manager \n"), progname);
log_err(_("Try \"%s --help\" for more information.\n"), progname); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
} }
@@ -1691,39 +1743,44 @@ help(const char *progname)
{ {
printf(_("\n%s: Replicator manager \n"), progname); printf(_("\n%s: Replicator manager \n"), progname);
printf(_("Usage:\n")); printf(_("Usage:\n"));
printf(_(" %s [OPTIONS] master {register}\n"), progname); printf(_(" %s [OPTIONS] master {register}\n"), progname);
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
progname); progname);
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname); printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
printf(_("\nGeneral options:\n")); printf(_("\nGeneral options:\n"));
printf(_(" --help show this help, then exit\n")); printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n")); printf(_(" --version output version information, then exit\n"));
printf(_(" --verbose output verbose activity information\n")); printf(_(" --verbose output verbose activity information\n"));
printf(_("\nConnection options:\n")); printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=DBNAME database to connect to\n")); printf(_(" -d, --dbname=DBNAME database to connect to\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n")); printf(_(" -p, --port=PORT database server port\n"));
printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_(" -U, --username=USERNAME database user name to connect as\n"));
printf(_("\nConfiguration options:\n")); printf(_("\nConfiguration options:\n"));
printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
printf(_(" -l, --local-port=PORT standby or witness server local port\n")); " copied to\n"));
printf(_(" -f, --config_file=PATH path to the configuration file\n")); printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); printf(_(" -f, --config_file=PATH path to the configuration file\n"));
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n")); printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n")); " wal_keep_segments (default: 5000)\n"));
printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
printf(_(" -W, --wait wait for a master to appear")); printf(_(" -k, --keep-history=VALUE keeps indicated number of days of\n" \
" history\n"));
printf(_(" -F, --force force potentially dangerous operations\n" \
" to happen\n"));
printf(_(" -W, --wait wait for a master to appear\n"));
printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("\n%s performs some tasks like clone a node, promote it or making follow\n"), progname);
printf(_("or making follow another node and then exits.\n")); printf(_("another node and then exits.\n\n"));
printf(_("COMMANDS:\n")); printf(_("COMMANDS:\n"));
printf(_(" master register - registers the master in a cluster\n")); printf(_(" master register - registers the master in a cluster\n"));
printf(_(" standby register - registers a standby in a cluster\n")); printf(_(" standby register - registers a standby in a cluster\n"));
printf(_(" standby clone [node] - allows creation of a new standby\n")); printf(_(" standby clone [node] - allows creation of a new standby\n"));
printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_(" standby promote - allows manual promotion of a specific standby into\n" \
printf(_("new master in the event of a failover\n")); " a new master in the event of a failover\n"));
printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); printf(_(" standby follow - allows the standby to re-point itself to a new\n" \
" master\n"));
printf(_(" cluster show - print node information\n")); printf(_(" cluster show - print node information\n"));
printf(_(" cluster cleanup - cleans monitor's history\n")); printf(_(" cluster cleanup - cleans monitor's history\n"));
} }
@@ -1790,7 +1847,7 @@ test_ssh_connection(char *host, char *remote_user)
else else
maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH); maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH);
log_debug(_("command is: %s"), script); log_debug(_("command is: %s\n"), script);
r = system(script); r = system(script);
if (r != 0) if (r != 0)
log_info(_("Can not connect to the remote host (%s)\n"), host); log_info(_("Can not connect to the remote host (%s)\n"), host);
@@ -1857,7 +1914,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
log_info(_("rsync partial transfer warning ignored\n")); log_info(_("rsync partial transfer warning ignored\n"));
} }
else else
log_warning( _("\nrsync completed with return code 24: " log_warning( _("rsync completed with return code 24: "
"\"Partial transfer due to vanished source files\".\n" "\"Partial transfer due to vanished source files\".\n"
"This can happen because of normal operation " "This can happen because of normal operation "
"on the master server, but it may indicate an " "on the master server, but it may indicate an "

View File

@@ -34,3 +34,29 @@ loglevel=NOTICE
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
# Default: STDERR # Default: STDERR
logfacility=STDERR logfacility=STDERR
# path to pg_ctl executable
pg_bindir=/usr/bin/
#
# you may add command line arguments for pg_ctl
#
# pg_ctl_options='-s'
#
# redirect stderr to a logfile
#
# logfile='/var/log/repmgr.log'
#
# change monitoring interval; default is 2s
#
# monitor_interval_secs=2
#
# change wait time for master; before we bail out and exit when the
# master disappears, we wait 6 * retry_promote_interval_secs seconds;
# by default this would be half an hour (since sleep_delay default
# value is 300)
#
# retry_promote_interval_secs=300

View File

@@ -1,6 +1,6 @@
/* /*
* repmgr.h * repmgr.h
* Copyright (c) 2ndQuadrant, 2010-2012 * Copyright (c) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -69,6 +69,6 @@ typedef struct
int keep_history; int keep_history;
} t_runtime_options; } t_runtime_options;
#define SLEEP_MONITOR 2 #define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }
#endif #endif

View File

@@ -1,7 +1,7 @@
/* /*
* repmgr.sql * repmgr.sql
* *
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
*/ */

400
repmgrd.c
View File

@@ -1,6 +1,6 @@
/* /*
* repmgrd.c - Replication manager daemon * repmgrd.c - Replication manager daemon
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This module connects to the nodes of a replication cluster and monitors * This module connects to the nodes of a replication cluster and monitors
* how far are they from master * how far are they from master
@@ -22,6 +22,9 @@
#include <signal.h> #include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
@@ -65,6 +68,7 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0};
a.xrecoff = uxrecoff a.xrecoff = uxrecoff
#endif #endif
/* /*
* Struct to keep info about the nodes, used in the voting process in * Struct to keep info about the nodes, used in the voting process in
* do_failover() * do_failover()
@@ -103,20 +107,22 @@ char repmgr_schema[MAXLEN];
bool failover_done = false; bool failover_done = false;
char *pid_file = NULL;
/* /*
* should initialize with {0} to be ANSI complaint ? but this raises * should initialize with {0} to be ANSI complaint ? but this raises
* error with gcc -Wall * error with gcc -Wall
*/ */
t_configuration_options config = {}; t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
static void help(const char* progname); static void help(const char* progname);
static void usage(void); static void usage(void);
static void checkClusterConfiguration(PGconn *conn, PGconn *primary); static void checkClusterConfiguration(PGconn *conn);
static void checkNodeConfiguration(char *conninfo); static void checkNodeConfiguration(void);
static void StandbyMonitor(void); static void StandbyMonitor(void);
static void WitnessMonitor(void); static void WitnessMonitor(void);
static bool CheckPrimaryConnection(void); static bool CheckConnection(PGconn *conn, const char *type);
static void update_shared_memory(char *last_wal_standby_applied); static void update_shared_memory(char *last_wal_standby_applied);
static void update_registration(void); static void update_registration(void);
static void do_failover(void); static void do_failover(void);
@@ -131,7 +137,15 @@ static volatile sig_atomic_t got_SIGHUP = false;
static void handle_sighup(SIGNAL_ARGS); static void handle_sighup(SIGNAL_ARGS);
static void handle_sigint(SIGNAL_ARGS); static void handle_sigint(SIGNAL_ARGS);
static void terminate(int retval);
#ifndef WIN32
static void setup_event_handlers(void); static void setup_event_handlers(void);
#endif
static void do_daemonize();
static void check_and_create_pid_file(const char *pid_file);
#define CloseConnections() \ #define CloseConnections() \
if (PQisBusy(primaryConn) == 1) \ if (PQisBusy(primaryConn) == 1) \
@@ -150,13 +164,16 @@ main(int argc, char **argv)
{"config", required_argument, NULL, 'f'}, {"config", required_argument, NULL, 'f'},
{"verbose", no_argument, NULL, 'v'}, {"verbose", no_argument, NULL, 'v'},
{"monitoring-history", no_argument, NULL, 'm'}, {"monitoring-history", no_argument, NULL, 'm'},
{"daemonize", no_argument, NULL, 'd'},
{"pid-file", required_argument, NULL, 'p'},
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
}; };
int optindex; int optindex;
int c; int c, ret;
bool daemonize = false;
char standby_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR], *ret_ver;
progname = get_progname(argv[0]); progname = get_progname(argv[0]);
@@ -174,7 +191,7 @@ main(int argc, char **argv)
} }
} }
while ((c = getopt_long(argc, argv, "f:v:m", long_options, &optindex)) != -1) while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1)
{ {
switch (c) switch (c)
{ {
@@ -187,13 +204,31 @@ main(int argc, char **argv)
case 'm': case 'm':
monitoring_history = true; monitoring_history = true;
break; break;
case 'd':
daemonize = true;
break;
case 'p':
pid_file = optarg;
break;
default: default:
usage(); usage();
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
} }
if (daemonize)
{
do_daemonize();
}
if (pid_file)
{
check_and_create_pid_file(pid_file);
}
#ifndef WIN32
setup_event_handlers(); setup_event_handlers();
#endif
/* /*
* Read the configuration file: repmgr.conf * Read the configuration file: repmgr.conf
@@ -203,13 +238,21 @@ main(int argc, char **argv)
{ {
log_err(_("Node information is missing. " log_err(_("Node information is missing. "
"Check the configuration file, or provide one if you have not done so.\n")); "Check the configuration file, or provide one if you have not done so.\n"));
exit(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
logger_init(progname, local_options.loglevel, local_options.logfacility); freopen("/dev/null", "r", stdin);
freopen("/dev/null", "w", stdout);
logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility);
if (verbose) if (verbose)
logger_min_verbose(LOG_INFO); logger_min_verbose(LOG_INFO);
if (log_type == REPMGR_SYSLOG)
{
freopen("/dev/null", "w", stderr);
}
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo);
@@ -217,12 +260,12 @@ main(int argc, char **argv)
/* should be v9 or better */ /* should be v9 or better */
log_info(_("%s Connected to database, checking its state\n"), progname); log_info(_("%s Connected to database, checking its state\n"), progname);
pg_version(myLocalConn, standby_version); ret_ver = pg_version(myLocalConn, standby_version);
if (strcmp(standby_version, "") == 0) if (ret_ver == NULL || strcmp(standby_version, "") == 0)
{ {
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); if(ret_ver != NULL)
PQfinish(myLocalConn); log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
@@ -239,12 +282,25 @@ main(int argc, char **argv)
* Set my server mode, establish a connection to primary * Set my server mode, establish a connection to primary
* and start monitor * and start monitor
*/ */
if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) ret = is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node);
if (ret == 1)
myLocalMode = WITNESS_MODE; myLocalMode = WITNESS_MODE;
else if (is_standby(myLocalConn)) else if (ret == 0)
myLocalMode = STANDBY_MODE; {
else /* is the master */ ret = is_standby(myLocalConn);
myLocalMode = PRIMARY_MODE;
if (ret == 1)
myLocalMode = STANDBY_MODE;
else if (ret == 0) /* is the master */
myLocalMode = PRIMARY_MODE;
}
/* XXX we did this before changing is_standby() to return int; we
* should not exit at this point, but for now we do until we have a
* better strategy */
if (ret == -1)
terminate(1);
switch (myLocalMode) switch (myLocalMode)
{ {
@@ -253,8 +309,8 @@ main(int argc, char **argv)
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
primaryConn = myLocalConn; primaryConn = myLocalConn;
checkClusterConfiguration(myLocalConn, primaryConn); checkClusterConfiguration(myLocalConn);
checkNodeConfiguration(local_options.conninfo); checkNodeConfiguration();
if (reload_configuration(config_file, &local_options)) if (reload_configuration(config_file, &local_options))
{ {
@@ -269,26 +325,26 @@ main(int argc, char **argv)
/* Check that primary is still alive, and standbies are sending info */ /* Check that primary is still alive, and standbies are sending info */
/* /*
* Every SLEEP_MONITOR seconds, do master checks * Every local_options.monitor_interval_secs seconds, do master checks
* XXX * XXX
* Check that standbies are sending info * Check that standbies are sending info
*/ */
do do
{ {
if (CheckPrimaryConnection()) if (CheckConnection(primaryConn, "master"))
{ {
/* /*
CheckActiveStandbiesConnections(); CheckActiveStandbiesConnections();
CheckInactiveStandbies(); CheckInactiveStandbies();
*/ */
sleep(SLEEP_MONITOR); sleep(local_options.monitor_interval_secs);
} }
else else
{ {
/* XXX /* XXX
* May we do something more verbose ? * May we do something more verbose ?
*/ */
exit(1); terminate(1);
} }
if (got_SIGHUP) if (got_SIGHUP)
@@ -299,6 +355,12 @@ main(int argc, char **argv)
PQfinish(myLocalConn); PQfinish(myLocalConn);
myLocalConn = establishDBConnection(local_options.conninfo, true); myLocalConn = establishDBConnection(local_options.conninfo, true);
primaryConn = myLocalConn; primaryConn = myLocalConn;
if (*local_options.logfile)
{
freopen(local_options.logfile, "a", stderr);
}
update_registration(); update_registration();
} }
got_SIGHUP = false; got_SIGHUP = false;
@@ -315,12 +377,11 @@ main(int argc, char **argv)
&primary_options.node, NULL); &primary_options.node, NULL);
if (primaryConn == NULL) if (primaryConn == NULL)
{ {
CloseConnections(); terminate(ERR_BAD_CONFIG);
exit(ERR_BAD_CONFIG);
} }
checkClusterConfiguration(myLocalConn, primaryConn); checkClusterConfiguration(myLocalConn);
checkNodeConfiguration(local_options.conninfo); checkNodeConfiguration();
if (reload_configuration(config_file, &local_options)) if (reload_configuration(config_file, &local_options))
{ {
@@ -330,7 +391,7 @@ main(int argc, char **argv)
} }
/* /*
* Every SLEEP_MONITOR seconds, do checks * Every local_options.monitor_interval_secs seconds, do checks
*/ */
if (myLocalMode == WITNESS_MODE) if (myLocalMode == WITNESS_MODE)
{ {
@@ -347,7 +408,7 @@ main(int argc, char **argv)
WitnessMonitor(); WitnessMonitor();
else if (myLocalMode == STANDBY_MODE) else if (myLocalMode == STANDBY_MODE)
StandbyMonitor(); StandbyMonitor();
sleep(SLEEP_MONITOR); sleep(local_options.monitor_interval_secs);
if (got_SIGHUP) if (got_SIGHUP)
{ {
@@ -396,7 +457,7 @@ WitnessMonitor(void)
* Check if the master is still available, if after 5 minutes of retries * Check if the master is still available, if after 5 minutes of retries
* we cannot reconnect, return false. * we cannot reconnect, return false.
*/ */
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
if (PQstatus(primaryConn) != CONNECTION_OK) if (PQstatus(primaryConn) != CONNECTION_OK)
{ {
@@ -404,8 +465,7 @@ WitnessMonitor(void)
* If we can't reconnect, just exit... * If we can't reconnect, just exit...
* XXX we need to make witness connect to the new master * XXX we need to make witness connect to the new master
*/ */
PQfinish(myLocalConn); terminate(0);
exit(0);
} }
/* Fast path for the case where no history is requested */ /* Fast path for the case where no history is requested */
@@ -475,13 +535,19 @@ StandbyMonitor(void)
unsigned long long int lsn_standby_received; unsigned long long int lsn_standby_received;
unsigned long long int lsn_standby_applied; unsigned long long int lsn_standby_applied;
int connection_retries; int connection_retries, ret;
bool did_retry = false;
/* /*
* Check if the master is still available, if after 5 minutes of retries * Check if the master is still available, if after 5 minutes of retries
* we cannot reconnect, try to get a new master. * we cannot reconnect, try to get a new master.
*/ */
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
if (!CheckConnection(myLocalConn, "standby"))
{
terminate(1);
}
if (PQstatus(primaryConn) != CONNECTION_OK) if (PQstatus(primaryConn) != CONNECTION_OK)
{ {
@@ -501,15 +567,17 @@ StandbyMonitor(void)
else else
{ {
log_err(_("We haven't found a new master, waiting before retry...\n")); log_err(_("We haven't found a new master, waiting before retry...\n"));
/* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */ /* wait local_options.retry_promote_interval_secs minutes before retries,
sleep(300); * after 6 failures (6 * local_options.monitor_interval_secs
* seconds) we stop trying */
sleep(local_options.retry_promote_interval_secs);
} }
} }
if (PQstatus(primaryConn) != CONNECTION_OK) if (PQstatus(primaryConn) != CONNECTION_OK)
{ {
log_err(_("We couldn't reconnect for long enough, exiting...\n")); log_err(_("We couldn't reconnect for long enough, exiting...\n"));
exit(ERR_DB_CON); terminate(ERR_DB_CON);
} }
} }
else if (local_options.failover == AUTOMATIC_FAILOVER) else if (local_options.failover == AUTOMATIC_FAILOVER)
@@ -524,11 +592,32 @@ StandbyMonitor(void)
} }
/* Check if we still are a standby, we could have been promoted */ /* Check if we still are a standby, we could have been promoted */
if (!is_standby(myLocalConn)) do {
ret = is_standby(myLocalConn);
switch (ret)
{
case 0:
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
terminate(1);
break;
case -1:
log_err(_("Standby node disappeared, trying to reconnect...\n"));
did_retry = true;
if (!CheckConnection(myLocalConn, "standby"))
{
terminate(0);
}
break;
}
} while(ret == -1);
if (did_retry)
{ {
log_err(_("It seems like we have been promoted, so exit from monitoring...\n")); log_info(_("standby connection got back up again!\n"));
CloseConnections();
exit(ERR_PROMOTED);
} }
/* Fast path for the case where no history is requested */ /* Fast path for the case where no history is requested */
@@ -652,8 +741,7 @@ do_failover(void)
{ {
log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn)); log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn));
PQclear(res); PQclear(res);
PQfinish(myLocalConn); terminate(ERR_DB_QUERY);
exit(ERR_DB_QUERY);
} }
/* /*
@@ -672,18 +760,21 @@ do_failover(void)
/* Initialize on false so if we can't reach this node we know that later */ /* Initialize on false so if we can't reach this node we know that later */
nodes[i].is_visible = false; nodes[i].is_visible = false;
nodes[i].is_ready = false; nodes[i].is_ready = false;
XLAssignValue(nodes[i].xlog_location, 0, 0); XLAssignValue(nodes[i].xlog_location, 0, 0);
log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false"); progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false");
nodeConn = establishDBConnection(nodes[i].conninfostr, false); nodeConn = establishDBConnection(nodes[i].conninfostr, false);
/* if we can't see the node just skip it */ /* if we can't see the node just skip it */
if (PQstatus(nodeConn) != CONNECTION_OK) if (PQstatus(nodeConn) != CONNECTION_OK)
continue; continue;
visible_nodes++; visible_nodes++;
nodes[i].is_visible = true; nodes[i].is_visible = true;
PQfinish(nodeConn); PQfinish(nodeConn);
} }
PQclear(res); PQclear(res);
@@ -699,7 +790,7 @@ do_failover(void)
log_err(_("Can't reach most of the nodes.\n" log_err(_("Can't reach most of the nodes.\n"
"Let the other standby servers decide which one will be the primary.\n" "Let the other standby servers decide which one will be the primary.\n"
"Manual action will be needed to readd this node to the cluster.\n")); "Manual action will be needed to readd this node to the cluster.\n"));
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
/* Query all the nodes to determine which ones are ready */ /* Query all the nodes to determine which ones are ready */
@@ -720,9 +811,12 @@ do_failover(void)
if (PQstatus(nodeConn) != CONNECTION_OK) if (PQstatus(nodeConn) != CONNECTION_OK)
{ {
log_err(_("It seems new problems are arising, manual intervention is needed\n")); log_err(_("It seems new problems are arising, manual intervention is needed\n"));
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
uxlogid = 0;
uxrecoff = 0;
sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
res = PQexec(nodeConn, sqlquery); res = PQexec(nodeConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -731,7 +825,7 @@ do_failover(void)
log_info(_("Connection details: %s\n"), nodes[i].conninfostr); log_info(_("Connection details: %s\n"), nodes[i].conninfostr);
PQclear(res); PQclear(res);
PQfinish(nodeConn); PQfinish(nodeConn);
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
@@ -746,7 +840,7 @@ do_failover(void)
PQclear(res); PQclear(res);
PQfinish(nodeConn); PQfinish(nodeConn);
log_info(_("InvalidXLogRecPtr detected in a standby\n")); log_info(_("InvalidXLogRecPtr detected in a standby\n"));
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);
@@ -761,11 +855,10 @@ do_failover(void)
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn)); log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn));
PQfinish(myLocalConn);
PQclear(res); PQclear(res);
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
update_shared_memory(last_wal_standby_applied); update_shared_memory(last_wal_standby_applied);
exit(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }
/* write last location in shared memory */ /* write last location in shared memory */
@@ -810,6 +903,9 @@ do_failover(void)
break; break;
} }
uxlogid = 0;
uxrecoff = 0;
sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema);
res = PQexec(nodeConn, sqlquery); res = PQexec(nodeConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -817,12 +913,22 @@ do_failover(void)
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn)); log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn));
PQclear(res); PQclear(res);
PQfinish(nodeConn); PQfinish(nodeConn);
exit(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
{
log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0));
/* we can't do anything but fail at this point... */
if (*PQgetvalue(res, 0, 0) == '\0')
{
log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
exit(ERR_BAD_CONFIG);
}
}
PQclear(res); PQclear(res);
PQfinish(nodeConn); PQfinish(nodeConn);
/* If position is 0/0, keep checking */ /* If position is 0/0, keep checking */
@@ -891,7 +997,7 @@ do_failover(void)
if (best_candidate.is_witness) if (best_candidate.is_witness)
{ {
log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname); log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname);
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
/* wait */ /* wait */
@@ -901,11 +1007,17 @@ do_failover(void)
log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"), log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
progname); progname);
log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command); log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command);
if (log_type == REPMGR_STDERR && *local_options.logfile)
{
fflush(stderr);
}
r = system(local_options.promote_command); r = system(local_options.promote_command);
if (r != 0) if (r != 0)
{ {
log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname); log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname);
exit(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
} }
else if (find_best) else if (find_best)
@@ -921,17 +1033,22 @@ do_failover(void)
* New Primary need some time to be promoted. * New Primary need some time to be promoted.
* The follow command should take care of that. * The follow command should take care of that.
*/ */
if (log_type == REPMGR_STDERR && *local_options.logfile)
{
fflush(stderr);
}
r = system(local_options.follow_command); r = system(local_options.follow_command);
if (r != 0) if (r != 0)
{ {
log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname); log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname);
exit(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
} }
else else
{ {
log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname); log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname);
exit(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
/* to force it to re-calculate mode and master node */ /* to force it to re-calculate mode and master node */
@@ -943,7 +1060,7 @@ do_failover(void)
static bool static bool
CheckPrimaryConnection(void) CheckConnection(PGconn *conn, const char *type)
{ {
int connection_retries; int connection_retries;
@@ -955,10 +1072,11 @@ CheckPrimaryConnection(void)
*/ */
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++) for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
{ {
if (!is_pgup(primaryConn, local_options.master_response_timeout)) if (!is_pgup(conn, local_options.master_response_timeout))
{ {
log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"), log_warning(_("%s: Connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
progname, progname,
type,
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries))); (local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
/* wait local_options.reconnect_intvl seconds between retries */ /* wait local_options.reconnect_intvl seconds between retries */
sleep(local_options.reconnect_intvl); sleep(local_options.reconnect_intvl);
@@ -967,12 +1085,12 @@ CheckPrimaryConnection(void)
{ {
if ( connection_retries > 0) if ( connection_retries > 0)
{ {
log_info(_("%s: Connection to master has been restored.\n"), progname); log_info(_("%s: Connection to %s has been restored.\n"), progname, type);
} }
return true; return true;
} }
} }
if (!is_pgup(primaryConn, local_options.master_response_timeout)) if (!is_pgup(conn, local_options.master_response_timeout))
{ {
log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"), progname); log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"), progname);
/* XXX Anything else to do here? */ /* XXX Anything else to do here? */
@@ -983,7 +1101,7 @@ CheckPrimaryConnection(void)
static void static void
checkClusterConfiguration(PGconn *conn, PGconn *primary) checkClusterConfiguration(PGconn *conn)
{ {
PGresult *res; PGresult *res;
@@ -997,8 +1115,7 @@ checkClusterConfiguration(PGconn *conn, PGconn *primary)
{ {
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn)); log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
PQclear(res); PQclear(res);
CloseConnections(); terminate(ERR_DB_QUERY);
exit(ERR_DB_QUERY);
} }
/* /*
@@ -1012,15 +1129,14 @@ checkClusterConfiguration(PGconn *conn, PGconn *primary)
{ {
log_err(_("The replication cluster is not configured\n")); log_err(_("The replication cluster is not configured\n"));
PQclear(res); PQclear(res);
CloseConnections(); terminate(ERR_BAD_CONFIG);
exit(ERR_BAD_CONFIG);
} }
PQclear(res); PQclear(res);
} }
static void static void
checkNodeConfiguration(char *conninfo) checkNodeConfiguration(void)
{ {
PGresult *res; PGresult *res;
@@ -1039,8 +1155,7 @@ checkNodeConfiguration(char *conninfo)
{ {
log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn)); log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn));
PQclear(res); PQclear(res);
CloseConnections(); terminate(ERR_BAD_CONFIG);
exit(ERR_BAD_CONFIG);
} }
/* /*
@@ -1055,8 +1170,7 @@ checkNodeConfiguration(char *conninfo)
if (myLocalMode == WITNESS_MODE) if (myLocalMode == WITNESS_MODE)
{ {
log_err(_("The witness is not configured\n")); log_err(_("The witness is not configured\n"));
CloseConnections(); terminate(ERR_BAD_CONFIG);
exit(ERR_BAD_CONFIG);
} }
/* Adding the node */ /* Adding the node */
@@ -1073,8 +1187,7 @@ checkNodeConfiguration(char *conninfo)
{ {
log_err(_("Cannot insert node details, %s\n"), log_err(_("Cannot insert node details, %s\n"),
PQerrorMessage(primaryConn)); PQerrorMessage(primaryConn));
CloseConnections(); terminate(ERR_BAD_CONFIG);
exit(ERR_BAD_CONFIG);
} }
} }
else else
@@ -1116,6 +1229,8 @@ void help(const char *progname)
printf(_(" --verbose output verbose activity information\n")); printf(_(" --verbose output verbose activity information\n"));
printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n")); printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n"));
printf(_(" -f, --config_file=PATH configuration file\n")); printf(_(" -f, --config_file=PATH configuration file\n"));
printf(_(" -d, --daemonize detach process from foreground\n"));
printf(_(" -p, --pid-file=PATH write a PID file\n"));
printf(_("\n%s monitors a cluster of servers.\n"), progname); printf(_("\n%s monitors a cluster of servers.\n"), progname);
} }
@@ -1124,9 +1239,7 @@ void help(const char *progname)
static void static void
handle_sigint(SIGNAL_ARGS) handle_sigint(SIGNAL_ARGS)
{ {
CloseConnections(); terminate(0);
logger_shutdown();
exit(1);
} }
/* SIGHUP: set flag to re-read config file at next convenient time */ /* SIGHUP: set flag to re-read config file at next convenient time */
@@ -1141,9 +1254,24 @@ setup_event_handlers(void)
{ {
pqsignal(SIGHUP, handle_sighup); pqsignal(SIGHUP, handle_sighup);
pqsignal(SIGINT, handle_sigint); pqsignal(SIGINT, handle_sigint);
pqsignal(SIGTERM, handle_sigint);
} }
#endif #endif
static void
terminate(int retval)
{
CloseConnections();
logger_shutdown();
if (pid_file)
{
unlink(pid_file);
}
exit(retval);
}
static void static void
update_shared_memory(char *last_wal_standby_applied) update_shared_memory(char *last_wal_standby_applied)
@@ -1160,6 +1288,13 @@ update_shared_memory(char *last_wal_standby_applied)
log_warning(_("Cannot update this standby's shared memory: %s\n"), PQerrorMessage(myLocalConn)); log_warning(_("Cannot update this standby's shared memory: %s\n"), PQerrorMessage(myLocalConn));
/* XXX is this enough reason to terminate this repmgrd? */ /* XXX is this enough reason to terminate this repmgrd? */
} }
else if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
{
/* this surely is more than enough reason to exit */
log_crit(_("Cannot update this standby's shared memory, maybe shared_preload_libraries=repmgr_funcs is not set?\n"));
exit(ERR_BAD_CONFIG);
}
PQclear(res); PQclear(res);
} }
@@ -1178,8 +1313,115 @@ update_registration(void)
if (PQresultStatus(res) != PGRES_COMMAND_OK) if (PQresultStatus(res) != PGRES_COMMAND_OK)
{ {
log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn)); log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn));
CloseConnections(); terminate(ERR_DB_CON);
exit(ERR_DB_CON);
} }
PQclear(res); PQclear(res);
} }
static void
do_daemonize()
{
char *ptr, path[MAXLEN];
pid_t pid = fork();
switch (pid)
{
case -1:
log_err("Error in fork(): %s\n", strerror(errno));
exit(ERR_SYS_FAILURE);
break;
case 0: /* child process */
pid = setsid();
if (pid == (pid_t)-1)
{
log_err("Error in setsid(): %s\n", strerror(errno));
exit(ERR_SYS_FAILURE);
}
/* ensure that we are no longer able to open a terminal */
pid = fork();
if(pid == -1) /* error case */
{
log_err("Error in fork(): %s\n", strerror(errno));
exit(ERR_SYS_FAILURE);
break;
}
if (pid != 0) /* parent process */
{
exit(0);
}
/* a child just flows along */
memset(path, 0, MAXLEN);
for (ptr = config_file + strlen(config_file); ptr > config_file; --ptr)
{
if (*ptr == '/')
{
strncpy(path, config_file, ptr - config_file);
}
}
if (*path == '\0')
{
*path = '/';
}
chdir(path);
break;
default: /* parent process */
exit(0);
}
}
static void
check_and_create_pid_file(const char *pid_file)
{
struct stat st;
FILE *fd;
char buff[MAXLEN];
pid_t pid;
if (stat(pid_file, &st) != -1)
{
memset(buff, 0, MAXLEN);
fd = fopen(pid_file, "r");
if (fd == NULL)
{
log_err("PID file %s exists but could not opened for reading. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
exit(ERR_BAD_CONFIG);
}
fread(buff, MAXLEN - 1, 1, fd);
fclose(fd);
pid = atoi(buff);
if (pid != 0)
{
if (kill(pid, 0) != -1)
{
log_err("PID file %s exists and seems to contain a valid PID. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
exit(ERR_BAD_CONFIG);
}
}
}
fd = fopen(pid_file, "w");
if (fd == NULL)
{
log_err("Could not open PID file %s!\n", pid_file);
exit(ERR_BAD_CONFIG);
}
fprintf(fd, "%d", getpid());
fclose(fd);
}

View File

@@ -9,7 +9,8 @@ DATA=uninstall_repmgr_funcs.sql
OBJS=repmgr_funcs.o OBJS=repmgr_funcs.o
ifdef USE_PGXS ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs) PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS) include $(PGXS)
else else
subdir = contrib/repmgr/sql subdir = contrib/repmgr/sql

View File

@@ -1,7 +1,7 @@
/* /*
* strutil.c * strutil.c
* *
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -25,7 +25,7 @@
#include "log.h" #include "log.h"
#include "strutil.h" #include "strutil.h"
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); static int xvsnprintf(char *str, size_t size, const char *format, va_list ap) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 0)));
/* Add strnlen on platforms that don't have it, like OS X */ /* Add strnlen on platforms that don't have it, like OS X */
#ifndef strnlen #ifndef strnlen
@@ -44,7 +44,7 @@ xvsnprintf(char *str, size_t size, const char *format, va_list ap)
retval = vsnprintf(str, size, format, ap); retval = vsnprintf(str, size, format, ap);
if (retval >= size) if (retval >= (int)size)
{ {
log_err(_("Buffer of size not large enough to format entire string '%s'\n"), log_err(_("Buffer of size not large enough to format entire string '%s'\n"),
str); str);

View File

@@ -1,6 +1,6 @@
/* /*
* strutil.h * strutil.h
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@@ -31,9 +31,9 @@
#define MAXCONNINFO 1024 #define MAXCONNINFO 1024
extern int xsnprintf(char *str, size_t size, const char *format, ...); extern int xsnprintf(char *str, size_t size, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
extern int sqlquery_snprintf(char *str, const char *format, ...); extern int sqlquery_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
extern int maxlen_snprintf(char *str, const char *format, ...); extern int maxlen_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
/* Add strnlen on platforms that don't have it, like OS X */ /* Add strnlen on platforms that don't have it, like OS X */
#ifndef strnlen #ifndef strnlen

View File

@@ -1,7 +1,7 @@
/* /*
* uninstall_repmgr.sql * uninstall_repmgr.sql
* *
* Copyright (C) 2ndQuadrant, 2010-2012 * Copyright (C) 2ndQuadrant, 2010-2014
* *
*/ */

View File

@@ -1,4 +1,5 @@
#ifndef _VERSION_H_ #ifndef _VERSION_H_
#define _VERSION_H_ #define _VERSION_H_
#define REPMGR_VERSION "2.0beta1"
#define REPMGR_VERSION "2.0RC1"
#endif #endif