Files
repmgr/repmgr.c
Greg Smith 3a950c9f8b Squashed commit of the following:
commit e7ef17117efe6679e154a4905d587c808b48df50
Merge: cd3a280... 43268f2...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 01:40:08 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	repmgr.c

commit cd3a280804a01c5270c5c743e5822c7beb9ac77a
Merge: 72ad378... 8200b68...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:52:42 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	config.c

commit 72ad378bed21d74dab743fec411fe10b19007481
Merge: 17bafa1... 367d0b1...
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:38:01 2011 -0400

    Merge commit 'origin/master' into autofailover

    Conflicts:
    	config.c
    	dbutils.c
    	repmgr.c
    	repmgrd.c

commit 17bafa1ca509c1f6614810bab2538e570ebc599e
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Tue Jun 7 00:31:28 2011 -0400

    Run astyle to fix recent changes

commit a5fbbaecce8fe86bc17c0ebeb1324f9262967316
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue May 10 00:46:58 2011 +0200

    Fix a crititcal bug in the decision process

    If the postgresql on the first node returned by the query to find
    candidates in do_failover is down then the initialization of the
    bestCandidate is done with non assigned variables.

    Fix the situation by moving the initialization in the loop above.
    And loop until we have a find_best. Added a log message if no candidate
    is found

commit 42b21475ac248db8f0e50f5956ef96808e92c68c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon May 9 22:39:21 2011 +0200

    Add test_ssh_connection

    The feature was written by Jaime and reworked to fix
    https://github.com/greg2ndQuadrant/repmgr/issues/5

commit 86f01afae631e9541600af6578e649d88c3ece98
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon May 9 21:39:42 2011 +0200

    Improve log output

commit db2f29fc1c8ea03a8ff85717873f8a876846b844
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:41:34 2011 -0500

    Only compare getenv("USER") when it's actually set, otherwise it
    will segfault

commit ea4f3f20747e2e0294551d5e61869bdde6d3cd7b
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:03:39 2011 -0500

    Fix a message to only show when log_info is requested and the verbose
    flag is set.
    This is because it needs a calculation that is only done when the
    verbose flag is set, so if i have requested log INFO level but haven't
    set the flag it shows a null

commit 35a53bac7e341cfdbb64d2c15fa77c9c4e18bd40
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon May 9 01:00:54 2011 -0500

    Use log_* functions in do_witness_create()

commit 8c526f758a46ad53b4d391fc76360561d4ff8bdd
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun May 8 19:30:34 2011 -0500

    Add a fallback_application_name parameter to the conninfo identify
    the connection if application_name is not set

commit 01057fc12cbc1fb656d619f483044f28a5f08d37
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri May 6 23:57:27 2011 +0200

    Fix the best_candidate loop

    there was an overflow in the loop, already fixed but loosed during merge.

commit e80effa3daf56f08005704fc1a5bbe69c1324212
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri May 6 23:55:15 2011 +0200

    Fix check in do_failover (merge faillure)

    And also remove an unused variable as I was here.

commit 79ba37e2933f4e87523a77375dfda1d96150e7d3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu May 5 21:15:46 2011 +0200

    Fix compile error

commit 67c7b5d68c95a60bb4cd0cfb750b4c8d047fa2a0
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 23:27:57 2011 +0200

    And apply astyle  ....

commit 9a321722537d96983b8162227ff629a267b6ed67
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 23:27:09 2011 +0200

    Cosmetic change to reduce diff with master

commit 09037efea3fa2c31896b5dc78b0340516a743ba6
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 22:26:03 2011 +0200

    Apply astyle

commit 7c4786f662943558be967be4a8dad976f52155dd
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 24 02:22:12 2011 +0200

    Improve the standby clone action

    By default, all config files and directories are cloned from the master in the
    same place in the slave.
    If a destination directory is provided (-D), everything is copied in the
    provided dir, and if the master have tablespaces repmgr exit without cloning.

commit a6d7f765b9403a2cff7e2e1df8ae45a5a7ee1665
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 23:31:09 2011 +0200

    Add success message for repmgr standby register

commit 26bf3b08e661137dd3f3c0d3c00fd6b3b90b08b3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:51:28 2011 +0200

    Change the exit to a return in config.c

commit 1bd8f4c119e1dbf9a94b2eaec884abce96eeb174
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:32:57 2011 +0200

    Reduce duplicate code

commit db553fab45ca075f95f09bdb2147de68948b60c8
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:24:04 2011 +0200

    Some cosmetic

commit f19d0ad714ebcf7df7726772e887c873d005d350
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Apr 22 22:23:06 2011 +0200

    Move a function declaration into header file

commit 1f328bc438c896a9f2067069d756f901b58d41f2
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Mon Apr 11 00:38:30 2011 -0500

    We don't use conninfo as a separate variable anymore

commit f6ade0d63b8a5dd43377f546f5311b4a151b2bfb
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 20:53:22 2011 -0500

    Fix a few typos

commit ceca9fa983c8dbde61a7a78da29a1e1871756d8c
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 19:32:57 2011 -0500

    Fix code to allow the code to compile:
    - some log_* had problems with parenthesis
    - some uses of variables without the runtime_options prefix

commit 73431f955afd77560bca5370924e09329566c4b7
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:21:37 2011 +0200

    Fix the debian package name

commit 688eab371110083ae8715b35f414e29c6d87e1ac
Merge: 5c23375... 7995c42...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:17:58 2011 +0200

    Merge branch 'autofailover' of git.2ndquadrant.it:repmgr into autofailover

commit 5c23375f88a53ed469e9d13934d618f7a74669be
Merge: cc3315c... c4ae574...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:08:36 2011 +0200

    Merge branch 'master' into autofailover

    Conflicts:
    	repmgr.c

commit 7995c428161566cfc54a67eb16f9134c859e7381
Merge: 788ff98... 1303e49...
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Apr 10 16:14:30 2011 -0500

    Merge branch 'autofailover' of git+ssh://git.2ndquadrant.it/git/repmgr into autofailover

commit cc3315ce235b898711c34fd1f2fa1116dbee4e16
Merge: 1303e49... d77186c...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 23:03:11 2011 +0200

    Merge commit 'd77186c90444b9c5ca2de201651841f56a7ded02' into autofailover

commit 1303e49852705046e15ef64f5f7ab739a1689431
Merge: 7ff621b... 4c792c8...
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sun Apr 10 22:28:08 2011 +0200

    Merge commit '4c792c8013f5713589f53dbdb47721febf139a85' into autofailover

commit 788ff98e94311a33e3e6f7d85a303cbc61288e5f
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Mar 15 19:00:56 2011 -0500

    A few fixes after merge to unbroke what the merge broke, and to make
    the new logging system more consistent through the system

commit 7ff621b96784dfaf40baab4f0f8e7857b4aed6ce
Author: Dan Farina <drfarina@acm.org>
Date:   Tue Dec 7 21:30:44 2010 -0800

    Install install/uninstall SQL also.

    Signed-off-by: Dan Farina <drfarina@acm.org>
    Signed-off-by: Peter van Hardenberg <pvh@heroku.com>

commit c9147dad8223eff20bf5d52ced8a35eed6d82110
Author: Dan Farina <drfarina@acm.org>
Date:   Tue Dec 7 21:30:20 2010 -0800

    Split up install/uninstall actions more like a standard contrib

    Signed-off-by: Dan Farina <drfarina@acm.org>
    Signed-off-by: Peter van Hardenberg <pvh@heroku.com>

commit c8028780b50f2c7fb4384cb9891796647f356e19
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sat Feb 12 13:29:32 2011 +0100

    Fixing SLEEP times and RETRY

commit 39a1bf3d29f3e33fbf0e1b066a311e8a72f2dc38
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Sat Feb 12 01:17:37 2011 +0100

    Add a pause after update_shared_memory() in do_failover

    we pause for SLEEP_MONITOR+1 to let other nodes update themselves.

commit 527af2baa945e3b640352c01c6dd181d93c9529a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 21:14:22 2011 +0100

    change the debian package filename too

commit c8cb27c7039b2b3a838554187a8add850a42027a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 15:14:40 2011 +0100

    Change package name for the automatic fail-over branch of repmgr

commit 7427988628f754e57069453d65a71f79117c3a3d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:28:03 2011 +0100

    Exit 1 when SIGINT

commit af366fe731b70e24ead056e50b69269392bd15a1
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:27:46 2011 +0100

    Improve log output when reloading configuration

commit 6cc18ce081d7bf55ba9993e9d87567879da35c4d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:20:36 2011 +0100

    Add reload conf on (re)start

commit 4259e2c410fd0ef1273c7d1b4ab8fcf1e778e968
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 14:01:37 2011 +0100

    avoid double free on repmgrd exit as master
    Per commit from Charles Duffy <charles@tippr.com>
    and faillure to cherry-pick it correctly.

    Conflicts:

    	repmgrd.c

commit 431e27b1c005e000f9a346d982419979b4363d77
Author: Greg Smith <greg@2ndQuadrant.com>
Date:   Thu Feb 10 15:09:18 2011 -0500

    Tweak .gitignore to ignore more doc build artifacts

commit b725fa9ae65c7bd5fea7a4e944db5685dee2e8bd
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sun Mar 13 15:16:27 2011 -0500

    Delete a paragraph that appears twice, because a merge problem

commit d990c77b327a282c1903b7a339f35a22b6a89958
Author: trbs <trbs@trbs.net>
Date:   Tue Jan 11 18:24:17 2011 +0100

    added note about postgresql-server-dev-9.0 and use libxslt-dev instead of version specific package name

commit 69bc1cd3772103b529598978160327e1f9025157
Author: trbs <trbs@trbs.net>
Date:   Fri Jan 7 01:32:31 2011 +0100

    fix line

commit f7b1d1e5e3764c85cec7afa81c164fac3679e1ea
Author: trbs <trbs@trbs.net>
Date:   Thu Dec 23 15:02:23 2010 +0100

    Updated README with Debian/Ubuntu install information

commit 77d28960ff78c3936be0e1029305b0b578e260a9
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 13:34:49 2011 +0100

    Create the function used for shared memory access in create_schema, note that this is incompatible with current master

commit 4a73043f232f0a143ede898841530f4d7442c95b
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Fri Feb 11 10:00:34 2011 +0100

    improve log output

commit 62c90a4e86b2cd56ec14255adcfef564945d0769
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:40:05 2011 -0500

    Close local connection on witness before exit on error of primary

commit e5156865e05670fa9944d74d472127082556d0a0
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:34:25 2011 -0500

    Remove a semicolon which is just a typo

commit 7586a09bc321241932adacf6a1431029964dc46f
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Feb 11 00:07:02 2011 -0500

    Fix the computation of quorum, we need to count master and the
    division should not be an integer division

commit a19c0ad2059a00e9e7415fc6ea280c109c809c9c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 10 23:54:35 2011 +0100

    move the functions back into public schema

commit 19fc8ffb1dc0fd9acddad5d22bf5c01704687474
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Thu Feb 10 00:48:00 2011 -0500

    A few more fixes.
    Make repmgr functions exists in repmgr schema and fix a typo that
    caused a seg fault.

commit c6d2b8c6421f93074d7d616980feb0175ee4ef36
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 17:56:44 2011 -0500

    A few places where i forgot to update the priority field

commit 0ff0bb8d981b868693c6a751e7e80473b25f2399
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 14:24:43 2011 -0500

    Fix a few bugs from last commit and make reload configuration also
    update registration on repl_nodes

commit 508c34e9dfb2bfb7e47d5c6836ead7992e6112fe
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 13:45:20 2011 -0500

    Add a way for the user to indicate it's preference about which node
    should be promoted in case of a conflict (ie: two nodes with the
    same wal location).

    This will be provided as a parameter in repmgr.conf called priority,
    andd will be registered in the repl_nodes table.

commit 6005f1bbf90de61b4c5ebc34302307fa05b019a7
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 11:15:30 2011 -0500

    Add a heartbeat for the witness, this should write to repl_monitor
    table so we can see the witness in repl_status and monitor if it
    is working.

    Also close connection at the end of do_witness_create in repmgr.c

commit ac1c6367ab689aeae2eff3dda22db42337f300c1
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Feb 9 01:26:41 2011 -0500

    Add a sighup handler to reload the configuration

commit 7df2fb7b74a3c5287319e56112840d9c2a3e7d5b
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 18:42:36 2011 +0100

    Change the is_pgup () check test

    remove spurious 'return'

commit 7e58e6aa91ab3f681854a44fe282b44da81768fa
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 16:53:17 2011 +0100

    Add constant for the sleep times and retry, rework monitor functions

    Rename MonitorExecute() to StandbyMonitor()
    Add    WitnessMonitor() # very simple version to start service mode isolation

commit 1b270dab2e2c3c60527b86a33cd0fc9c0d11c08c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Feb 3 16:23:01 2011 +0100

    Improve PrimaryCheck

    add a function "bool is_pgup()"

    Now, repmgrd-master can work.

commit c6f07229713c8f2b77596459c06184edddd8d77e
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 19:31:06 2011 +0100

    Fix strcmp in config parser, now failover parameter should be set correctly

commit 0b690698a0d9aa87d3e8f1e462ee0771aa2ae9e8
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 16:23:50 2011 +0100

    fix sprintf extra param

commit 6050da315824048661be9c425ae6005576e5870f
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 13:53:29 2011 +0100

    Add some other files to ignore

commit a146dd581b46ea0e26b7b56b087d6b0d4ae15d44
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 13:53:04 2011 +0100

    Fix SQL query

commit 8f5db0f9c0f68ce2519afda72b6a778536427eab
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 00:51:54 2011 +0100

    Some more minor fix and remove TODO

commit c9299ad74e8f929bdc24804a6a834f24b66b7074
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Wed Feb 2 00:39:18 2011 +0100

    fix some memory leak and fix testlogic for is_standby is_witness

    * is_standby() must be tested *after* is_witness else we think we are in a master
    * remove SELECT * in favor of SELECT witness

commit cc5d06ea8bf1dcde4c264e95eb90f7fb1e821af3
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 23:40:15 2011 +0100

    Forgot to remove a param from fprintf

commit 426e22fa8dfd78f0c256bda1b166a31807de9ec6
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 22:47:58 2011 +0100

    Restore previous usage of --force and rsync tablespace before data_dir

    The --force option is used to reduce the time needed to restore a failed
    node: it will overwrite existing files thanks to rsync --delete option

    The tablespaces need to be coyed first, because there are symlinks to
    them from the data_directory

commit 1937973fced703d14159e6aae1cbdabb5619accb
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 21:09:12 2011 +0100

    Improve message of repmgrd

commit 035a9bcc1eea55cd95790bc72276727cc492694a
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 21:08:38 2011 +0100

    Fix (bool *)PQgetval

commit bf9181654213f898949e9c8f094b974915f82258
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 01:54:49 2011 +0100

    Fix pg_hba on witness and connection

    * Copy the pg_hba.conf file from master to witness server
    * createdb and createuser in witness if they are different from getenv(USER)

commit a2d8dcb2fd105d8f02bd76856969aca6605c66fa
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Tue Feb 1 01:01:43 2011 +0100

    Improve initialization of repmgr (+ critical bug and minor fixes)

    * standby clone now *clone* the master files and dir to the *same*
      place on the standby if destination_directory is not provided
    * add preload library to the witness configuration
    * sleep 2 seconds after starting the witness postgresql to let it
      start enough to be able to connect to it.
    * Fix rsync files
    * Fix insert configuration into witness

commit bc1a265d272e4805ac7859c208b51b57edd10fc7
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 12:25:20 2011 +0100

    Fix some error message new line

commit e087bd5de5ab43ffac90c6a20df6ef3fb19eed6d
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 11:37:08 2011 +0100

    Guess data_directory from master in 'standby clone' and remove --force for dir

    --force does not overwrite directories anymore (it was not working very well anyway)
    dest_dir is the same as the master's one by default.
    Move down the tablespace check directories process

commit 0a961e7ef05f26c87af1946b8141a639076fc488
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 11:21:40 2011 +0100

    Add new function: create_pgdir (and fix 2 bugs in the process).
    It also fix function create_schema.

    Reduce repmgr code

commit 7e5958dcc1daa9b54cb6f295af96fbef750c7952
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 10:34:58 2011 +0100

    Improve an ERROR message

commit f3a66a65a361f919727fc2d0ff9bf9544a10a822
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Mon Jan 31 10:25:45 2011 +0100

    Improve error message about 'wal_keep_segments'

commit 150dbcc0fe53ce4eff08797210fd2e9e4dd0e17a
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Sat Jan 29 23:35:00 2011 -0500

    Add witness server support

commit 6281e22a9c467da883ad960567f8ab6bdbc155ba
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 21:32:11 2011 +0100

    Build all at once and update debian makefile to include the sql/

commit 50d752bf1ead7c9343900d4b494844284b7aac6c
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 02:10:31 2011 +0100

    Adding information for debian and --version test

commit 16d56dbfa05314eea69869ee2a7a705636432ad9
Author: Cédric Villemain <cedric@2ndQuadrant.fr>
Date:   Thu Jan 27 02:03:20 2011 +0100

    Add a hint at the end of the standby clone
    and minor typo and message shuffle

commit 6404ba247de1e2e3b995f30b6e7626e459849136
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 26 06:13:30 2011 -0500

    Fix compiler warning about variables beign used unintialized

commit a4f48993d5fe3b22bdd2aaefcff315115f8764b7
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Jan 21 21:09:03 2011 -0500

    Fix a new typo

commit 904e61c9edcbbce6b1027c80ff77317d7cbd4919
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Fri Jan 21 19:30:56 2011 -0500

    Use a function to make the call to repmgr_update_standby_location()
    so i avoid typos like the one i fixed in a previous commit. It also
    makes the code cleaner.

commit 4ed388726f4bc0a52cc88d044d1f81697f348a7c
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 19 09:17:16 2011 -0500

    Fix a typo when calling the sql function that writes shared memory

commit d9232266561306eabef90e13c084c051a0e7f458
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Jan 18 01:25:23 2011 -0500

    Define the variable that we are using to test the result status of
    the system() call.

commit 4d131c212b91e40ca027f76637c182456ab12514
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Tue Jan 18 01:04:12 2011 -0500

    Makes repmgrd warn if promote_command or follow_command fails, add
    a "still alive" check for primary.
    Add a few messages and fix a bug in do_failover() in which we were
    using a closed PGresult.

commit a5189e68cf4c8cf84259ea667a35e96de56fa4c9
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Thu Jan 13 15:45:50 2011 -0500

    Initial attempt to get autofailover

commit d0e09010a9d4610997c900b62ea1df2a71b01015
Author: Jaime Casanova <jaime@2ndQuadrant.com>
Date:   Wed Jan 12 14:40:29 2011 -0500

    Add options failover, promote_command and follow_command
    to repmgr.conf, in pass also rename sample repmgr.conf to
    repmgr.conf.sample
    promote_command and follow_command allows to use a custom script
    for those actions.
2011-06-07 01:42:15 -04:00

2003 lines
59 KiB
C

/*
* repmgr.c - Command interpreter for the repmgr
* Copyright (C) 2ndQuadrant, 2010-2011
*
* This module is a command-line utility to easily setup a cluster of
* hot standby servers for an HA environment
*
* Commands implemented are.
* MASTER REGISTER
* STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
* WITNESS CREATE
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "repmgr.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "log.h"
#include "config.h"
#include "check_dir.h"
#include "strutil.h"
#define RECOVERY_FILE "recovery.conf"
#define RECOVERY_DONE_FILE "recovery.done"
#define NO_ACTION 0 /* Not a real action, just to initialize */
#define MASTER_REGISTER 1
#define STANDBY_REGISTER 2
#define STANDBY_CLONE 3
#define STANDBY_PROMOTE 4
#define STANDBY_FOLLOW 5
#define WITNESS_CREATE 6
static bool create_recovery_file(const char *data_dir, char *master_conninfo);
static int test_ssh_connection(char *host, char *remote_user);
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory);
static bool check_parameters_for_action(const int action);
static bool create_schema(PGconn *conn);
static bool copy_configuration(PGconn *masterconn, PGconn *witnessconn);
static void do_master_register(void);
static void do_standby_register(void);
static void do_standby_clone(void);
static void do_standby_promote(void);
static void do_standby_follow(void);
static void do_witness_create(void);
static void usage(void);
static void help(const char *progname);
/* Global variables */
static const char *progname;
static const char *keywords[6];
static const char *values[6];
char repmgr_schema[MAXLEN];
bool need_a_node = true;
/* XXX This should be mapped into a command line option */
bool require_password = false;
/* Initialization of runtime options */
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" };
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "" };
static char *server_mode = NULL;
static char *server_cmd = NULL;
int
main(int argc, char **argv)
{
static struct option long_options[] =
{
{"dbname", required_argument, NULL, 'd'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
{"username", required_argument, NULL, 'U'},
{"dest-dir", required_argument, NULL, 'D'},
{"local-port", required_argument, NULL, 'l'},
{"config-file", required_argument, NULL, 'f'},
{"remote-user", required_argument, NULL, 'R'},
{"wal-keep-segments", required_argument, NULL, 'w'},
{"force", no_argument, NULL, 'F'},
{"ignore-rsync-warning", no_argument, NULL, 'I'},
{"verbose", no_argument, NULL, 'v'},
{NULL, 0, NULL, 0}
};
int optindex;
int c;
int action = NO_ACTION;
progname = get_progname(argv[0]);
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
help(progname);
exit(SUCCESS);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
printf("%s (PostgreSQL) " PG_VERSION "\n", progname);
exit(SUCCESS);
}
}
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:F:I:v", long_options,
&optindex)) != -1)
{
switch (c)
{
case 'd':
strncpy(runtime_options.dbname, optarg, MAXLEN);
break;
case 'h':
strncpy(runtime_options.host, optarg, MAXLEN);
break;
case 'p':
if (atoi(optarg) > 0)
strncpy(runtime_options.masterport, optarg, MAXLEN);
break;
case 'U':
strncpy(runtime_options.username, optarg, MAXLEN);
break;
case 'D':
strncpy(runtime_options.dest_dir, optarg, MAXFILENAME);
break;
case 'l':
if (atoi(optarg) > 0)
strncpy(runtime_options.localport, optarg, MAXLEN);
break;
case 'f':
strncpy(runtime_options.config_file, optarg, MAXLEN);
break;
case 'R':
strncpy(runtime_options.remote_user, optarg, MAXLEN);
break;
case 'w':
if (atoi(optarg) > 0)
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
break;
case 'F':
runtime_options.force = true;
break;
case 'I':
runtime_options.ignore_rsync_warn = true;
break;
case 'v':
runtime_options.verbose = true;
break;
default:
usage();
exit(ERR_BAD_CONFIG);
}
}
/*
* Now we need to obtain the action, this comes in one of these forms:
* MASTER REGISTER |
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
* WITNESS CREATE
*
* the node part is optional, if we receive it then we shouldn't
* have received a -h option
*/
if (optind < argc)
{
server_mode = argv[optind++];
if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0 &&
strcasecmp(server_mode, "WITNESS") != 0)
{
usage();
exit(ERR_BAD_CONFIG);
}
}
if (optind < argc)
{
server_cmd = argv[optind++];
/* check posibilities for all server modes */
if (strcasecmp(server_mode, "MASTER") == 0)
{
if (strcasecmp(server_cmd, "REGISTER") == 0)
action = MASTER_REGISTER;
}
else if (strcasecmp(server_mode, "STANDBY") == 0)
{
if (strcasecmp(server_cmd, "REGISTER") == 0)
action = STANDBY_REGISTER;
else if (strcasecmp(server_cmd, "CLONE") == 0)
action = STANDBY_CLONE;
else if (strcasecmp(server_cmd, "PROMOTE") == 0)
action = STANDBY_PROMOTE;
else if (strcasecmp(server_cmd, "FOLLOW") == 0)
action = STANDBY_FOLLOW;
}
else if (strcasecmp(server_mode, "WITNESS") == 0)
if (strcasecmp(server_cmd, "CREATE") == 0)
action = WITNESS_CREATE;
}
if (action == NO_ACTION)
{
usage();
exit(ERR_BAD_CONFIG);
}
/* For some actions we still can receive a last argument */
if (action == STANDBY_CLONE)
{
if (optind < argc)
{
if (runtime_options.host[0])
{
log_err(_("Conflicting parameters: you can't use -h while providing a node separately.\n"));
usage();
exit(ERR_BAD_CONFIG);
}
strncpy(runtime_options.host, argv[optind++], MAXLEN);
}
}
switch (optind < argc)
{
case 0:
break;
default:
log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"),
progname, argv[optind + 1]);
usage();
exit(ERR_BAD_CONFIG);
}
if (!check_parameters_for_action(action))
exit(ERR_BAD_CONFIG);
if (!runtime_options.dbname[0])
{
if (getenv("PGDATABASE"))
strncpy(runtime_options.dbname, getenv("PGDATABASE"), MAXLEN);
else if (getenv("PGUSER"))
strncpy(runtime_options.dbname, getenv("PGUSER"), MAXLEN);
else
strncpy(runtime_options.dbname, DEFAULT_DBNAME, MAXLEN);
}
/* Read the configuration file, normally repmgr.conf */
if (!runtime_options.config_file[0])
strncpy(runtime_options.config_file, DEFAULT_CONFIG_FILE, MAXLEN);
if (runtime_options.verbose)
printf(_("Opening configuration file: %s\n"), runtime_options.config_file);
/*
* XXX Do not read config files for action where it is not required (clone
* for example).
*/
parse_config(runtime_options.config_file, &options);
keywords[2] = "user";
values[2] = (runtime_options.username[0]) ? runtime_options.username : NULL;
keywords[3] = "dbname";
values[3] = runtime_options.dbname;
keywords[4] = "application_name";
values[4] = (char *) progname;
keywords[5] = NULL;
values[5] = NULL;
/*
* Initialize the logger. If verbose command line parameter was
* input, make sure that the log level is at least INFO. This
* is mainly useful for STANDBY CLONE. That doesn't require a
* configuration file where a logging level might be specified
* at, but it often requires detailed logging to troubleshoot
* problems.
*/
logger_init(progname, options.loglevel, options.logfacility);
if (runtime_options.verbose)
logger_min_verbose(LOG_INFO);
/*
* Node configuration information is not needed for all actions,
* with STANDBY CLONE being the main exception.
*/
if (need_a_node)
{
if (options.node == -1)
{
log_err(_("Node information is missing. "
"Check the configuration file.\n"));
exit(ERR_BAD_CONFIG);
}
}
/* Prepare the repmgr schema variable */
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, options.cluster_name);
switch (action)
{
case MASTER_REGISTER:
do_master_register();
break;
case STANDBY_REGISTER:
do_standby_register();
break;
case STANDBY_CLONE:
do_standby_clone();
break;
case STANDBY_PROMOTE:
do_standby_promote();
break;
case STANDBY_FOLLOW:
do_standby_follow();
break;
case WITNESS_CREATE:
do_witness_create();
break;
default:
usage();
exit(ERR_BAD_CONFIG);
}
logger_shutdown();
return 0;
}
static void
do_master_register(void)
{
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
bool schema_exists = false;
char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR];
conn = establishDBConnection(options.conninfo, true);
/* master should be v9 or better */
log_info(_("%s connecting to master database\n"), progname);
pg_version(conn, master_version);
if (strcmp(master_version, "") == 0)
{
PQfinish(conn);
log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
return;
}
/* Check we are a master */
log_info(_("%s connected to master, checking its state\n"), progname);
if (is_standby(conn))
{
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* Assemble a quoted schema name
* XXX This is not currently used due to a merge conflict, but
* probably should be */
if (false)
{
char *identifier = PQescapeIdentifier(conn, repmgr_schema,
strlen(repmgr_schema));
maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier);
}
/* Check if there is a schema for this cluster */
sqlquery_snprintf(sqlquery,
"SELECT 1 FROM pg_namespace "
"WHERE nspname = '%s'", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about schemas: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
if (PQntuples(res) > 0) /* schema exists */
{
if (!runtime_options.force) /* and we are not forcing so error */
{
log_notice(_("Schema %s already exists.\n"), repmgr_schema);
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
schema_exists = true;
}
PQclear(res);
if (!schema_exists)
{
log_info(_("master register: creating database objects inside the %s schema\n"), repmgr_schema);
/* ok, create the schema */
if (!create_schema(conn))
return;
}
else
{
PGconn *master_conn;
int id;
/* Ensure there isn't any other master already registered */
master_conn = getMasterConnection(conn, repmgr_schema, options.node,
options.cluster_name, &id,NULL);
if (master_conn != NULL)
{
PQfinish(master_conn);
log_warning(_("There is a master already in cluster %s\n"), options.cluster_name);
exit(ERR_BAD_CONFIG);
}
}
/* Now register the master */
if (runtime_options.force)
{
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_warning(_("Cannot delete node details, %s\n"),
PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, conninfo, priority) "
"VALUES (%d, '%s', '%s', %d)",
repmgr_schema, options.node, options.cluster_name, options.conninfo, options.priority);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_warning(_("Cannot insert node details, %s\n"),
PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
PQfinish(conn);
log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
options.cluster_name, options.node, options.conninfo);
return;
}
static void
do_standby_register(void)
{
PGconn *conn;
PGconn *master_conn;
int master_id;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR];
char standby_version[MAXVERSIONSTR];
/* XXX: A lot of copied code from do_master_register! Refactor */
log_info(_("%s connecting to standby database\n"), progname);
conn = establishDBConnection(options.conninfo, true);
/* should be v9 or better */
log_info(_("%s connected to standby, checking its state\n"), progname);
pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0)
{
PQfinish(conn);
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* Check we are a standby */
if (!is_standby(conn))
{
log_err(_("repmgr: This node should be a standby (%s)\n"), options.conninfo);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* Assemble a quoted schema name
* XXX This is not currently used due to a merge conflict, but
* probably should be */
if (false)
{
char *identifier = PQescapeIdentifier(conn, repmgr_schema,
strlen(repmgr_schema));
maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier);
}
/* Check if there is a schema for this cluster */
sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema);
log_debug(_("standby register: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about tablespaces: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
if (PQntuples(res) == 0)
{
/* schema doesn't exist */
log_err(_("Schema %s doesn't exists.\n"), repmgr_schema);
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
PQclear(res);
/* check if there is a master in this cluster */
log_info(_("%s connecting to master database\n"), progname);
master_conn = getMasterConnection(conn, repmgr_schema, options.node, options.cluster_name,
&master_id, NULL);
if (!master_conn)
{
log_err(_("A master must be defined before configuring a slave\n"));
exit(ERR_BAD_CONFIG);
}
/* master should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(master_conn, master_version);
if (strcmp(master_version, "") == 0)
{
PQfinish(conn);
PQfinish(master_conn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* master and standby version should match */
if (strcmp(master_version, standby_version) != 0)
{
PQfinish(conn);
PQfinish(master_conn);
log_err(_("%s needs versions of both master (%s) and standby (%s) to match.\n"),
progname, master_version, standby_version);
exit(ERR_BAD_CONFIG);
}
/* Now register the standby */
log_info(_("%s registering the standby\n"), progname);
if (runtime_options.force)
{
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug(_("standby register: %s\n"), sqlquery);
if (!PQexec(master_conn, sqlquery))
{
log_err(_("Cannot delete node details, %s\n"),
PQerrorMessage(master_conn));
PQfinish(master_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, conninfo, priority) "
"VALUES (%d, '%s', '%s', %d)",
repmgr_schema, options.node, options.cluster_name, options.conninfo, options.priority);
log_debug(_("standby register: %s\n"), sqlquery);
if (!PQexec(master_conn, sqlquery))
{
log_err(_("Cannot insert node details, %s\n"),
PQerrorMessage(master_conn));
PQfinish(master_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
log_info(_("%s registering the standby complete\n"), progname);
PQfinish(master_conn);
PQfinish(conn);
log_notice(_("Standby node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
options.cluster_name, options.node, options.conninfo);
return;
}
static void
do_standby_clone(void)
{
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
int r = 0;
int i;
bool flag_success = false;
bool test_mode = false;
char tblspc_dir[MAXFILENAME];
char master_data_directory[MAXFILENAME];
char local_data_directory[MAXFILENAME];
char master_xlog_directory[MAXFILENAME];
char local_xlog_directory[MAXFILENAME];
char master_stats_temp_directory[MAXFILENAME];
char local_stats_temp_directory[MAXFILENAME];
char master_control_file[MAXFILENAME];
char local_control_file[MAXFILENAME];
char master_config_file[MAXFILENAME];
char local_config_file[MAXFILENAME];
char master_hba_file[MAXFILENAME];
char local_hba_file[MAXFILENAME];
char master_ident_file[MAXFILENAME];
char local_ident_file[MAXFILENAME];
char *first_wal_segment = NULL;
const char *last_wal_segment = NULL;
char master_version[MAXVERSIONSTR];
/*
* if dest_dir has been provided, we copy everything in the same path
* if dest_dir is set and the master have tablespace, repmgr will stop
* because it is more complex to remap the path for the tablespaces and it
* does not look useful at the moment
*/
if (runtime_options.dest_dir[0])
{
test_mode = true;
log_notice(_("%s Destination directory %s provided, try to clone everything in it.\n"), progname, runtime_options.dest_dir);
}
/* Connection parameters for master only */
keywords[0] = "host";
values[0] = runtime_options.host;
keywords[1] = "port";
values[1] = runtime_options.masterport;
/* We need to connect to check configuration and start a backup */
log_info(_("%s connecting to master database\n"), progname);
conn = establishDBConnectionByParams(keywords,values,true);
/* primary should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(conn, master_version);
if (strcmp(master_version, "") == 0)
{
PQfinish(conn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* Check we are cloning a primary node */
if (is_standby(conn))
{
PQfinish(conn);
log_err(_("\nThe command should clone a primary node\n"));
exit(ERR_BAD_CONFIG);
}
/* And check if it is well configured */
if (!guc_setted(conn, "wal_level", "=", "hot_standby"))
{
PQfinish(conn);
log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname);
exit(ERR_BAD_CONFIG);
}
if (!guc_setted(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments))
{
PQfinish(conn);
log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments);
exit(ERR_BAD_CONFIG);
}
if (!guc_setted(conn, "archive_mode", "=", "on"))
{
PQfinish(conn);
log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname);
exit(ERR_BAD_CONFIG);
}
/*
* Check if the tablespace locations exists and that we can write to
* them.
*/
sqlquery_snprintf(sqlquery,
"SELECT spclocation "
" FROM pg_tablespace "
"WHERE spcname NOT IN ('pg_default', 'pg_global')");
log_debug(_("standby clone: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about tablespaces: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
for (i = 0; i < PQntuples(res); i++)
{
if (test_mode)
{
log_err("Can't clone in test mode when master have tablespace\n");
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME);
/*
* Check this directory could be used for tablespace
* this will create the directory a bit too early
* XXX build an array of tablespace to create later in the backup
*/
if (!create_pgdir(tblspc_dir, runtime_options.force))
{
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
if (r != 0)
{
log_err(_("%s: Aborting, remote host %s is not reachable.\n"), progname, runtime_options.host);
goto stop_backup;
}
log_notice(_("Starting backup...\n"));
/* Get the data directory full path and the configuration files location */
sqlquery_snprintf(sqlquery,
"SELECT name, setting "
" FROM pg_settings "
" WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file', 'stats_temp_directory')");
log_debug(_("standby clone: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about data directory and configuration files: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
for (i = 0; i < PQntuples(res); i++)
{
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME);
else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0)
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0)
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
else if (strcmp(PQgetvalue(res, i, 0), "stats_temp_directory") == 0)
strncpy(master_stats_temp_directory, PQgetvalue(res, i, 1), MAXFILENAME);
else
log_warning(_("unknown parameter: %s\n"), PQgetvalue(res, i, 0));
}
PQclear(res);
log_info(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn));
/*
* XXX master_xlog_directory should be discovered from master configuration
* but it is not possible via SQL. We need to use a command via ssh
*/
maxlen_snprintf(master_xlog_directory, "%s/pg_xlog", master_data_directory);
if (test_mode)
{
strncpy(local_data_directory, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_config_file, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_hba_file, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_ident_file, runtime_options.dest_dir, MAXFILENAME);
maxlen_snprintf(local_stats_temp_directory, "%s/pg_stat_tmp", runtime_options.dest_dir);
maxlen_snprintf(local_xlog_directory, "%s/pg_xlog", runtime_options.dest_dir);
}
else
{
strncpy(local_data_directory, master_data_directory, MAXFILENAME);
strncpy(local_config_file, master_config_file, MAXFILENAME);
strncpy(local_hba_file, master_hba_file, MAXFILENAME);
strncpy(local_ident_file, master_ident_file, MAXFILENAME);
strncpy(local_stats_temp_directory, master_stats_temp_directory, MAXFILENAME);
strncpy(local_xlog_directory, master_xlog_directory, MAXFILENAME);
}
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
if (r != 0)
{
log_err(_("%s: Aborting, remote host %s is not reachable.\n"), progname, runtime_options.host);
goto stop_backup;
}
log_notice(_("Starting backup...\n"));
/*
* inform the master we will start a backup and get the first XLog filename
* so we can say to the user we need those files
*/
sqlquery_snprintf(
sqlquery,
"SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))",
time(NULL));
log_debug(_("standby clone: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't start backup: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
if (runtime_options.verbose)
{
char *first_wal_seg_pq = PQgetvalue(res, 0, 0);
size_t buf_sz = strlen(first_wal_seg_pq);
first_wal_segment = malloc(buf_sz + 1);
xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq);
}
PQclear(res);
/* Check the directory could be used as a PGDATA dir */
if (!create_pgdir(local_data_directory, runtime_options.force))
{
log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"),
progname, local_data_directory);
goto stop_backup;
}
/*
* 1) first move global/pg_control
*
* 2) then move data_directory ommiting the files we have already moved and
* pg_xlog content
*
* 3) finally We need to backup configuration files (that could be on other
* directories, debian like systems likes to do that), so look at
* config_file, hba_file and ident_file but we can omit
* external_pid_file ;)
*
* On error we need to return but before that execute pg_stop_backup()
*/
/* need to create the global sub directory */
maxlen_snprintf(master_control_file, "%s/global/pg_control", master_data_directory);
maxlen_snprintf(local_control_file, "%s/global", local_data_directory);
log_info(_("standby clone: master control file '%s'\n"), master_control_file);
if (!create_directory(local_control_file))
{
log_err(_("%s: couldn't create directory %s ...\n"),
progname, local_control_file);
goto stop_backup;
}
log_info(_("standby clone: master control file '%s'\n"), master_control_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_control_file, local_control_file,
false);
if (r != 0)
{
log_warning(_("standby clone: failed copying master control file '%s'\n"), master_control_file);
goto stop_backup;
}
log_info(_("standby clone: master data directory '%s'\n"), master_data_directory);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_data_directory, local_data_directory,
true);
if (r != 0)
{
log_warning(_("standby clone: failed copying master data directory '%s'\n"), master_data_directory);
goto stop_backup;
}
/*
* Copy tablespace locations, i'm doing this separately because i couldn't
* find and appropiate rsync option but besides we could someday make all
* these rsync happen concurrently
* XXX We may not do that if we are in test_mode but it does not hurt too much
* (except if a tablespace is created during the test)
*/
sqlquery_snprintf(sqlquery,
"SELECT spclocation "
" FROM pg_tablespace "
" WHERE spcname NOT IN ('pg_default', 'pg_global')");
log_debug(_("standby clone: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about tablespaces: %s\n"), PQerrorMessage(conn));
PQclear(res);
goto stop_backup;
}
for (i = 0; i < PQntuples(res); i++)
{
strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME);
log_info(_("standby clone: master tablespace '%s'\n"), tblspc_dir);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
tblspc_dir, tblspc_dir,
true);
if (r != 0)
{
log_warning(_("standby clone: failed copying tablespace directory '%s'\n"), tblspc_dir);
goto stop_backup;
}
}
log_info(_("standby clone: master config file '%s'\n"), master_config_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_config_file, local_config_file,
false);
if (r != 0)
{
log_warning(_("standby clone: failed copying master config file '%s'\n"), master_config_file);
goto stop_backup;
}
log_info(_("standby clone: master hba file '%s'\n"), master_hba_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_hba_file, local_hba_file,
false);
if (r != 0)
{
log_warning(_("standby clone: failed copying master hba file '%s'\n"), master_hba_file);
goto stop_backup;
}
log_info(_("standby clone: master ident file '%s'\n"), master_ident_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_ident_file, local_ident_file,
false);
if (r != 0)
{
log_warning(_("standby clone: failed copying master ident file '%s'\n"), master_ident_file);
goto stop_backup;
}
/* we success so far, flag that to allow a better HINT */
flag_success = true;
stop_backup:
/*
* Inform the master that we have finished the backup.
*
* Don't have this one exit if it fails, so that a more informative
* error message will also appear about the backup not being stopped.
*/
log_info(_("%s connecting to master database to stop backup\n"), progname);
conn=establishDBConnectionByParams(keywords,values,false);
log_notice(_("Finishing backup...\n"));
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
log_debug(_("standby clone: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_STOP_BACKUP);
}
last_wal_segment = PQgetvalue(res, 0, 0);
if (runtime_options.verbose)
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
progname, first_wal_segment, last_wal_segment);
/* Finished with the database connection now */
PQclear(res);
PQfinish(conn);
/*
* Only free the first_wal_segment since it was copied out of the
* pqresult.
*/
free(first_wal_segment);
first_wal_segment = NULL;
/* If the rsync failed then exit */
if (r != 0)
{
log_err(_("Couldn't rsync the master...\nYou have to cleanup the destination directory (%s) manually!\n"),
local_data_directory);
exit(ERR_BAD_RSYNC);
}
/*
* We need to create the pg_xlog sub directory too.
*/
if (!create_directory(local_xlog_directory))
{
log_err(_("%s: couldn't create directory %s, you will need to do it manually...\n"),
progname, local_xlog_directory);
r = ERR_NEEDS_XLOG; /* continue, but eventually exit returning error */
}
/* Finally, write the recovery.conf file */
create_recovery_file(local_data_directory, NULL);
/*
* We don't start the service yet because we still may want to
* move the directory
*/
log_notice(_("%s standby clone complete\n"), progname);
/* HINT message : what to do next ? */
if (flag_success)
{
log_notice("HINT: You can now start your postgresql server\n");
if (test_mode)
{
log_notice(_("for example : pg_ctl -D %s start\n"), local_data_directory);
}
else
{
log_notice("for example : /etc/init.d/postgresql start\n");
}
}
exit(r);
}
static void
do_standby_promote(void)
{
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char script[MAXLEN];
PGconn *old_master_conn;
int old_master_id;
int r;
char data_dir[MAXLEN];
char recovery_file_path[MAXFILENAME];
char recovery_done_path[MAXFILENAME];
char standby_version[MAXVERSIONSTR];
/* We need to connect to check configuration */
log_info(_("%s connecting to master database\n"), progname);
conn = establishDBConnection(options.conninfo, true);
/* we need v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0)
{
PQfinish(conn);
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* Check we are in a standby node */
if (!is_standby(conn))
{
log_err(_("%s: The command should be executed on a standby node\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* we also need to check if there isn't any master already */
old_master_conn = getMasterConnection(conn, repmgr_schema, options.node, options.cluster_name,
&old_master_id, NULL);
if (old_master_conn != NULL)
{
PQfinish(old_master_conn);
log_err(_("There is a master already in this cluster\n"));
exit(ERR_BAD_CONFIG);
}
log_notice(_("%s: Promoting standby\n"), progname);
/* Get the data directory full path and the last subdirectory */
sqlquery_snprintf(sqlquery, "SELECT setting "
" FROM pg_settings WHERE name = 'data_directory'");
log_debug(_("standby promote: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about data directory: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
strcpy(data_dir, PQgetvalue(res, 0, 0));
PQclear(res);
PQfinish(conn);
log_info(_("%s: Marking recovery done\n"), progname);
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE);
rename(recovery_file_path, recovery_done_path);
/*
* We assume the pg_ctl script is in the PATH. Restart and wait for
* the server to finish starting, so that the check below will
* find an active server rather than one starting up. This may
* hang for up the default timeout (60 seconds).
*/
log_notice(_("%s: restarting server using pg_ctl\n"), progname);
maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
log_err(_("Can't restart PostgreSQL server\n"));
exit(ERR_NO_RESTART);
}
/* reconnect to check we got promoted */
log_info(_("%s connecting to now restarted database\n"), progname);
conn = establishDBConnection(options.conninfo, true);
if (is_standby(conn))
{
log_err(_("\n%s: STANDBY PROMOTE failed, this is still a standby node.\n"), progname);
}
else
{
log_err(_("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"), progname);
}
PQfinish(conn);
return;
}
static void
do_standby_follow(void)
{
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char script[MAXLEN];
char master_conninfo[MAXLEN];
PGconn *master_conn;
int master_id;
int r;
char data_dir[MAXLEN];
char master_version[MAXVERSIONSTR];
char standby_version[MAXVERSIONSTR];
/* We need to connect to check configuration */
log_info(_("%s connecting to standby database\n"), progname);
conn = establishDBConnection(options.conninfo, true);
/* Check we are in a standby node */
log_info(_("%s connected to standby, checking its state\n"), progname);
if (!is_standby(conn))
{
log_err(_("\n%s: The command should be executed in a standby node\n"), progname);
return;
exit(ERR_BAD_CONFIG);
}
/* should be v9 or better */
pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0)
{
PQfinish(conn);
log_err(_("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* we also need to check if there is any master in the cluster */
log_info(_("%s connecting to master database\n"), progname);
master_conn = getMasterConnection(conn, repmgr_schema, options.node,
options.cluster_name, &master_id,(char *) &master_conninfo);
if (master_conn == NULL)
{
PQfinish(conn);
log_err(_("There isn't a master to follow in this cluster\n"));
exit(ERR_BAD_CONFIG);
}
/* Check we are going to point to a master */
if (is_standby(master_conn))
{
PQfinish(conn);
log_err(_("%s: The node to follow should be a master\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* should be v9 or better */
log_info(_("%s connected to master, checking its state\n"), progname);
pg_version(master_conn, master_version);
if (strcmp(master_version, "") == 0)
{
PQfinish(conn);
PQfinish(master_conn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
exit(ERR_BAD_CONFIG);
}
/* master and standby version should match */
if (strcmp(master_version, standby_version) != 0)
{
PQfinish(conn);
PQfinish(master_conn);
log_err(_("%s needs versions of both master (%s) and standby (%s) to match.\n"),
progname, master_version, standby_version);
exit(ERR_BAD_CONFIG);
}
/*
* set the host and masterport variables with the master ones
* before closing the connection because we will need them to
* recreate the recovery.conf file
*/
strncpy(runtime_options.host, PQhost(master_conn), MAXLEN);
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
PQfinish(master_conn);
log_info(_("%s Changing standby's master"),progname);
/* Get the data directory full path */
sqlquery_snprintf(sqlquery, "SELECT setting "
" FROM pg_settings WHERE name = 'data_directory'");
log_debug(_("standby follow: %s\n"), sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about data directory: %s\n"), PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
strcpy(data_dir, PQgetvalue(res, 0, 0));
PQclear(res);
PQfinish(conn);
/* write the recovery.conf file */
if (!create_recovery_file(data_dir,NULL))
exit(ERR_BAD_CONFIG);
/* Finally, restart the service */
/* We assume the pg_ctl script is in the PATH */
maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
log_err(_("Can't restart service\n"));
return;
exit(ERR_NO_RESTART);
}
return;
}
static void
do_witness_create(void)
{
PGconn *masterconn;
PGconn *witnessconn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char script[MAXLEN];
char buf[MAXLEN];
FILE *pg_conf = NULL;
int r = 0;
int i;
char master_version[MAXVERSIONSTR];
char createcommand[MAXLEN];
char master_hba_file[MAXLEN];
/* Check this directory could be used as a PGDATA dir */
if (!create_pgdir(runtime_options.dest_dir, runtime_options.force))
{
return;
}
/* Connection parameters for master only */
keywords[0] = "host";
values[0] = runtime_options.host;
keywords[1] = "port";
values[1] = runtime_options.masterport;
/* We need to connect to check configuration and copy it */
masterconn = PQconnectdbParams(keywords, values, true);
if (!masterconn)
{
log_err(_("%s: could not connect to master\n"), progname);
return;
}
/* primary should be v9 or better */
pg_version(masterconn, master_version);
if (strcmp(master_version, "") == 0)
{
PQfinish(masterconn);
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
return;
}
/* Check we are connecting to a primary node */
if (is_standby(masterconn))
{
PQfinish(masterconn);
log_err(_("The command should not run on a standby node\n"));
return;
}
log_info(_("Succesfully connected to primary.\n"));
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
if (r != 0)
{
log_err(_("%s: Aborting, remote host %s is not reachable.\n"), progname, runtime_options.host);
return;
}
/*
* To create a witness server we need to:
* 1) initialize the cluster
* 2) register the witness in repl_nodes
* 3) copy configuration from master
*/
/* Create the cluster for witness */
/* We assume the pg_ctl script is in the PATH */
sprintf(script, "pg_ctl -D %s init", runtime_options.dest_dir);
log_info("Initialize cluster for witness: %s.\n", script);
r = system(script);
if (r != 0)
{
log_err("Can't iniatialize cluster for witness server\n");
return;
}
/*
* default port for the witness is 5499,
* but user can provide a different one
*/
snprintf(buf, sizeof(buf), "%s/postgresql.conf", runtime_options.dest_dir);
pg_conf = fopen(buf, "a");
if (pg_conf == NULL)
{
log_err(_("\n%s: could not open \"%s\" for adding extra config: %s\n"), progname, buf, strerror(errno));
exit(ERR_BAD_CONFIG);
}
snprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname);
fputs(buf, pg_conf);
if (!runtime_options.localport[0])
strncpy(runtime_options.localport, "5499", MAXLEN);
snprintf(buf, sizeof(buf), "port = %s\n", runtime_options.localport);
fputs(buf, pg_conf);
snprintf(buf, sizeof(buf), "shared_preload_libraries = 'repmgr_funcs'\n") ;
fputs(buf, pg_conf);
snprintf(buf, sizeof(buf), "listen_addresses = '*'\n") ;
fputs(buf, pg_conf);
fclose(pg_conf);
/* Get the pg_hba.conf full path */
sprintf(sqlquery, "SELECT name, setting "
" FROM pg_settings "
" WHERE name IN ('hba_file')");
log_debug(_("witness create: %s"), sqlquery);
res = PQexec(masterconn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("Can't get info about pg_hba.conf: %s\n"), PQerrorMessage(masterconn));
PQclear(res);
PQfinish(masterconn);
return;
}
for (i = 0; i < PQntuples(res); i++)
{
if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
strcpy(master_hba_file, PQgetvalue(res, i, 1));
else
log_err(_("uknown parameter: %s"), PQgetvalue(res, i, 0));
}
PQclear(res);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_hba_file, runtime_options.dest_dir,
false);
if (r != 0)
{
log_err(_("Can't rsync the pg_hba.conf file from master\n"));
return;
}
/* start new instance */
sprintf(script, "pg_ctl -D %s start", runtime_options.dest_dir);
log_info(_("Start cluster for witness: %s"), script);
r = system(script);
if (r != 0)
{
log_err(_("Can't start cluster for witness server\n"));
return;
}
/* register ourselves in the master */
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, conninfo, priority, witness) "
"VALUES (%d, '%s', '%s', %d, true)",
repmgr_schema, options.node, options.cluster_name, options.conninfo);
log_debug(_("witness create: %s"), sqlquery);
if (!PQexec(masterconn, sqlquery))
{
log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(masterconn));
PQfinish(masterconn);
return;
}
/* Let the server start */
sleep(2);
/*
create the local user and local db if it is not the default one
values[2] is the username we use to connect to master,
values[3] is the dbname we use to connect to master,
we suppose it is the same in the repmgr.conf (obviously it is preferable)
FIXME this is fragile and its a temporary solution
*/
if (getenv("USER"))
{
if (!(strcmp(getenv("USER"), values[2]) == 0))
{
sprintf(createcommand, "createuser -p %s -s %s", runtime_options.localport, values[2]);
log_info("creating user for witness: %s", createcommand);
r = system(createcommand);
if (r != 0)
{
log_err("Can't create local user\n");
return;
}
sprintf(createcommand, "createdb -p %s -O %s %s", runtime_options.localport, values[2], values[3]);
log_info("creating database for witness: %s", createcommand);
r = system(createcommand);
if (r != 0)
{
log_err("Can't create local db\n");
return;
}
}
}
/* establish a connection to the witness, and create the schema */
witnessconn = establishDBConnection(options.conninfo, true);
log_info(_("Starting copy of configuration from master"));
if (!create_schema(witnessconn))
{
PQfinish(masterconn);
PQfinish(witnessconn);
return;
}
/* copy configuration from master, only repl_nodes is needed */
if (!copy_configuration(masterconn, witnessconn))
{
PQfinish(masterconn);
PQfinish(witnessconn);
return;
}
PQfinish(masterconn);
PQfinish(witnessconn);
log_notice(_("Configuration has been succesfully copied to the witness\n"));
}
static void
usage(void)
{
log_err(_("\n\n%s: Replicator manager \n"), progname);
log_err(_("Try \"%s --help\" for more information.\n"), progname);
}
static void
help(const char *progname)
{
printf(_("\n%s: Replicator manager \n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTIONS] master {register}\n"), progname);
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
progname);
printf(_("\nGeneral options:\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_(" --verbose output verbose activity information\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
printf(_("\nConfiguration options:\n"));
printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n"));
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
printf(_(" -f, --config_file=PATH path to the configuration file\n"));
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
printf(_("or making follow another node and then exits.\n"));
printf(_("COMMANDS:\n"));
printf(_(" master register - registers the master in a cluster\n"));
printf(_(" standby register - registers a standby in a cluster\n"));
printf(_(" standby clone [node] - allows creation of a new standby\n"));
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
printf(_("new master in the event of a failover\n"));
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
}
/*
* Creates a recovery file for a standby.
*
* Writes master_conninfo to recovery.conf if is non-NULL
*/
static bool
create_recovery_file(const char *data_dir, char *master_conninfo)
{
FILE *recovery_file;
char recovery_file_path[MAXLEN];
char line[MAXLEN];
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
recovery_file = fopen(recovery_file_path, "w");
if (recovery_file == NULL)
{
log_err(_("could not create recovery.conf file, it could be necessary to create it manually\n"));
return false;
}
maxlen_snprintf(line, "standby_mode = 'on'\n");
if (fputs(line, recovery_file) == EOF)
{
log_err(_("recovery file could not be written, it could be necessary to create it manually\n"));
fclose(recovery_file);
return false;
}
maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host,
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432");
/*
* Template a password into the connection string in recovery.conf
* if a full connection string is not already provided.
*
* Sometimes this is passed by the user explicitly, and otherwise
* we try to get it into the environment.
*
* XXX: This is pretty dirty, at least push this up to the caller rather
* than hitting environment variables at this level.
*/
if (master_conninfo == NULL)
{
char *password = getenv("PGPASSWORD");
if (password != NULL)
{
maxlen_snprintf(line,
"primary_conninfo = 'host=%s port=%s password=%s'\n",
runtime_options.host,
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432",
password);
}
else
{
if (require_password)
{
log_err(_("%s: PGPASSWORD not set, but having one is required\n"),
progname);
exit(ERR_BAD_PASSWORD);
}
}
}
if (fputs(line, recovery_file) == EOF)
{
log_err(_("recovery file could not be written, it could be necessary to create it manually\n"));
fclose(recovery_file);
return false;
}
/*FreeFile(recovery_file);*/
fclose(recovery_file);
return true;
}
static int
test_ssh_connection(char *host, char *remote_user)
{
char script[MAXLEN];
int r;
/* Check if we have ssh connectivity to host before trying to rsync */
if (!remote_user[0])
maxlen_snprintf(script, "ssh -o Batchmode=yes %s /bin/true", host);
else
maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s /bin/true", host, remote_user);
log_debug(_("command is: %s"), script);
r = system(script);
if (r != 0)
log_info(_("Can not connect to the remote host (%s)\n"), host);
return r;
}
static int
copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory)
{
char script[MAXLEN];
char rsync_flags[MAXLEN];
char host_string[MAXLEN];
int r;
if (strnlen(options.rsync_options, MAXLEN) == 0)
maxlen_snprintf(
rsync_flags, "%s",
"--archive --checksum --compress --progress --rsh=ssh");
else
maxlen_snprintf(rsync_flags, "%s", options.rsync_options);
if (runtime_options.force)
strcat(rsync_flags, " --delete");
if (!remote_user[0])
{
maxlen_snprintf(host_string, "%s", host);
}
else
{
maxlen_snprintf(host_string,"%s@%s",remote_user,host);
}
if (is_directory)
{
strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid");
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
rsync_flags, host_string, remote_path, local_path);
}
else
{
maxlen_snprintf(script, "rsync %s %s:%s %s",
rsync_flags, host_string, remote_path, local_path);
}
log_info(_("rsync command line: '%s'\n"), script);
r = system(script);
/*
* If we are transfering a directory (data directory, tablespace directories)
* then we can ignore some rsync warnings. If we get some of those errors, we
* treat them as 0 only if passed the --ignore-rsync-warning command-line option.
*
* List of ignorable rsync errors:
* 24 Partial transfer due to vanished source files
*/
if ((WEXITSTATUS(r) == 24) && is_directory)
{
if (runtime_options.ignore_rsync_warn)
{
r = 0;
log_info(_("rsync partial transfer warning ignored\n"));
}
else
log_warning( _("\nrsync completed with return code 24: "
"\"Partial transfer due to vanished source files\".\n"
"This can happen because of normal operation "
"on the master server, but it may indicate an "
"unexpected change during cloning. If you are certain "
"no changes were made to the master, try cloning "
"again using \"repmgr --force --ignore-rsync-warning\"."));
}
if (r != 0)
log_err(_("Can't rsync from remote file or directory (%s:%s)\n"),
host_string, remote_path);
return r;
}
/*
* Tries to avoid useless or conflicting parameters
*/
static bool
check_parameters_for_action(const int action)
{
bool ok = true;
switch (action)
{
case MASTER_REGISTER:
/*
* To register a master we only need the repmgr.conf
* all other parameters are at least useless and could be
* confusing so reject them
*/
if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] ||
runtime_options.dbname[0])
{
log_err(_("You can't use connection parameters to the master when issuing a MASTER REGISTER command.\n"));
usage();
ok = false;
}
if (runtime_options.dest_dir[0])
{
log_err(_("You don't need a destination directory for MASTER REGISTER command\n"));
usage();
ok = false;
}
break;
case STANDBY_REGISTER:
/*
* To register a standby we only need the repmgr.conf
* we don't need connection parameters to the master
* because we can detect the master in repl_nodes
*/
if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] ||
runtime_options.dbname[0])
{
log_err(_("You can't use connection parameters to the master when issuing a STANDBY REGISTER command.\n"));
usage();
ok = false;
}
if (runtime_options.dest_dir[0])
{
log_err(_("You don't need a destination directory for STANDBY REGISTER command\n"));
usage();
ok = false;
}
break;
case STANDBY_PROMOTE:
/*
* To promote a standby we only need the repmgr.conf
* we don't want connection parameters to the master
* because we will try to detect the master in repl_nodes
* if we can't find it then the promote action will be cancelled
*/
if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] ||
runtime_options.dbname[0])
{
log_err(_("You can't use connection parameters to the master when issuing a STANDBY PROMOTE command.\n"));
usage();
ok = false;
}
if (runtime_options.dest_dir[0])
{
log_err(_("You don't need a destination directory for STANDBY PROMOTE command\n"));
usage();
ok = false;
}
break;
case STANDBY_FOLLOW:
/*
* To make a standby follow a master we only need the repmgr.conf
* we don't want connection parameters to the new master
* because we will try to detect the master in repl_nodes
* if we can't find it then the follow action will be cancelled
*/
if (runtime_options.host[0] || runtime_options.masterport[0] || runtime_options.username[0] ||
runtime_options.dbname[0])
{
log_err(_("You can't use connection parameters to the master when issuing a STANDBY FOLLOW command.\n"));
usage();
ok = false;
}
if (runtime_options.dest_dir[0])
{
log_err(_("You don't need a destination directory for STANDBY FOLLOW command\n"));
usage();
ok = false;
}
break;
case STANDBY_CLONE:
/*
* Issue a friendly notice that the configuration file is not
* necessary nor read at all in when performing a STANDBY CLONE
* action.
*/
if (runtime_options.config_file[0])
{
log_notice(_("Only command line parameters for the connection "
"to the master are used when issuing a STANDBY CLONE command. "
"The passed configuration file is neither required nor used for "
"its node configuration portions\n\n"));
}
/*
* To clone a master into a standby we need connection parameters
* repmgr.conf is useless because we don't have a server running in
* the standby; warn the user, but keep going.
*/
if (runtime_options.host == NULL)
{
log_notice(_("You need to use connection parameters to "
"the master when issuing a STANDBY CLONE command."));
ok = false;
}
need_a_node = false;
break;
case WITNESS_CREATE:
/* allow all parameters to be supplied */
break;
}
return ok;
}
static bool
create_schema(PGconn *conn)
{
char sqlquery[QUERY_STR_LEN];
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_err(_("Cannot create the schema %s: %s\n"),
repmgr_schema, PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* ... the tables */
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( "
" id integer primary key, "
" cluster text not null, "
" conninfo text not null, "
" priority integer not null, "
" witness boolean not null default false)", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_err(_("Cannot create the table %s.repl_nodes: %s\n"),
repmgr_schema, PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( "
" primary_node INTEGER NOT NULL, "
" standby_node INTEGER NOT NULL, "
" last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, "
" last_wal_primary_location TEXT NOT NULL, "
" last_wal_standby_location TEXT NOT NULL, "
" replication_lag BIGINT NOT NULL, "
" apply_lag BIGINT NOT NULL) ", repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_err(_("Cannot create the table %s.repl_monitor: %s\n"),
repmgr_schema, PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* and the view */
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
" WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node "
" ORDER BY last_monitor_time desc) "
" FROM %s.repl_monitor) "
" SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, "
" last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, "
" pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag "
" FROM monitor_info a "
" WHERE row_number = 1", repmgr_schema, repmgr_schema);
log_debug(_("master register: %s\n"), sqlquery);
if (!PQexec(conn, sqlquery))
{
log_err(_("Cannot create the view %s.repl_status: %s\n"),
repmgr_schema, PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* XXX Here we MUST try to load the repmgr_function.sql not hardcode it here */
sprintf(sqlquery,
"CREATE OR REPLACE FUNCTION public.repmgr_update_standby_location(text) RETURNS boolean "
"AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location' "
"LANGUAGE C STRICT ");
if (!PQexec(conn, sqlquery))
{
fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n",
PQerrorMessage(conn));
return false;
}
sprintf(sqlquery,
"CREATE OR REPLACE FUNCTION public.repmgr_get_last_standby_location() RETURNS text "
"AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location' "
"LANGUAGE C STRICT ");
if (!PQexec(conn, sqlquery))
{
fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n",
PQerrorMessage(conn));
return false;
}
return true;
}
static bool
copy_configuration(PGconn *masterconn, PGconn *witnessconn)
{
char sqlquery[MAXLEN];
PGresult *res;
int i;
sprintf(sqlquery, "TRUNCATE TABLE repmgr_%s.repl_nodes", options.cluster_name);
if (!PQexec(witnessconn, sqlquery))
{
fprintf(stderr, "Cannot clean node details in the witness, %s\n",
PQerrorMessage(witnessconn));
return false;
}
sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes", options.cluster_name);
res = PQexec(masterconn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get configuration from master: %s\n",
PQerrorMessage(masterconn));
PQclear(res);
return false;
}
for (i = 0; i < PQntuples(res); i++)
{
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, conninfo, priority, witness) "
"VALUES (%d, '%s', '%s', %d, '%s')",
repmgr_schema, atoi(PQgetvalue(res, i, 0)),
options.cluster_name, PQgetvalue(res, i, 2),
atoi(PQgetvalue(res, i, 3)),
PQgetvalue(res, i, 4));
if (!PQexec(witnessconn, sqlquery))
{
fprintf(stderr, "Cannot copy configuration to witness, %s\n",
PQerrorMessage(witnessconn));
return false;
}
}
return true;
}