mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
42 Commits
REL3_3_STA
...
v1.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
efd50f11ac | ||
|
|
45a39084ed | ||
|
|
94c73a016f | ||
|
|
be5cbe4ddd | ||
|
|
30d35d5b4c | ||
|
|
fa889a11ac | ||
|
|
f4087d0a32 | ||
|
|
a55d7a4bd3 | ||
|
|
5d8cf6abe0 | ||
|
|
9caa243354 | ||
|
|
6880483947 | ||
|
|
3d89fdadab | ||
|
|
6e9e4e05ae | ||
|
|
17a160e970 | ||
|
|
e0e01aa9db | ||
|
|
b09eff9f76 | ||
|
|
3c5d82b9ef | ||
|
|
257dbc4f42 | ||
|
|
2a64099163 | ||
|
|
41c05bea7b | ||
|
|
7d76d86e19 | ||
|
|
36d5b5bc24 | ||
|
|
c543402d65 | ||
|
|
d0959b953e | ||
|
|
0660bded0b | ||
|
|
209a0c64d2 | ||
|
|
fd76ec6283 | ||
|
|
7d579cf71f | ||
|
|
d790ef740b | ||
|
|
aa6633b027 | ||
|
|
c3bffce379 | ||
|
|
78aea00a6d | ||
|
|
91601204b5 | ||
|
|
c91ddc2f5e | ||
|
|
72f74dd7a7 | ||
|
|
901d07fa92 | ||
|
|
f0e609bcd4 | ||
|
|
94c9c3a5c6 | ||
|
|
3af5243bcc | ||
|
|
85bbae462a | ||
|
|
14e49d41c2 | ||
|
|
1bd8a703c8 |
2
CREDITS
2
CREDITS
@@ -10,3 +10,5 @@ Hannu Krosing <hannu@2ndQuadrant.com>
|
|||||||
Cédric Villemain <cedric@2ndquadrant.com>
|
Cédric Villemain <cedric@2ndquadrant.com>
|
||||||
Charles Duffy <charles@dyfis.net>
|
Charles Duffy <charles@dyfis.net>
|
||||||
Daniel Farina <daniel@heroku.com>
|
Daniel Farina <daniel@heroku.com>
|
||||||
|
Marco Nenciarini <marco.nenciarini@2ndquadrant.it>
|
||||||
|
Carlo Ascani <carlo.ascani@2ndquadrant.it>
|
||||||
|
|||||||
25
HISTORY
25
HISTORY
@@ -1,5 +1,21 @@
|
|||||||
1.0.0 2010-12-05
|
1.2.0 2012-07-27
|
||||||
First public release
|
Test ssh connection before trying to rsync (Cédric)
|
||||||
|
Add CLUSTER SHOW command (Carlo)
|
||||||
|
Add CLUSTER CLEANUP command (Jaime)
|
||||||
|
Add function write_primary_conninfo (Marco)
|
||||||
|
Teach repmgr how to get tablespace's location in different pg version (Jaime)
|
||||||
|
Improve version message (Carlo)
|
||||||
|
|
||||||
|
1.1.1 2012-04-18
|
||||||
|
Add --ignore-rsync-warning (Cédric)
|
||||||
|
Add strnlen for compatibility with OS X (Greg)
|
||||||
|
Improve performance of repl_status view (Jaime)
|
||||||
|
Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers)
|
||||||
|
Complete documentation about possible error conditions (Jaime)
|
||||||
|
Document how to clean history (Jaime)
|
||||||
|
|
||||||
|
1.1.0 2011-03-09
|
||||||
|
Make options -U, -R and -p not mandatory (Jaime)
|
||||||
|
|
||||||
1.1.0b1 2011-02-24
|
1.1.0b1 2011-02-24
|
||||||
Fix missing "--force" option in help (Greg Smith)
|
Fix missing "--force" option in help (Greg Smith)
|
||||||
@@ -28,6 +44,5 @@
|
|||||||
Map old verbose flag into a useful setting for the new logger (Greg)
|
Map old verbose flag into a useful setting for the new logger (Greg)
|
||||||
Document repmgrd startup restrictions and log info about them (Greg)
|
Document repmgrd startup restrictions and log info about them (Greg)
|
||||||
|
|
||||||
1.1.0 2011-03-09
|
1.0.0 2010-12-05
|
||||||
Make options -U, -R and -p not mandatory (Jaime)
|
First public release
|
||||||
|
|
||||||
|
|||||||
43
README.rst
43
README.rst
@@ -814,6 +814,23 @@ and on "prime."
|
|||||||
|
|
||||||
The servers are now again acting as primary on "prime" and standby on "standby".
|
The servers are now again acting as primary on "prime" and standby on "standby".
|
||||||
|
|
||||||
|
Maintainance of monitor history
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Once you have changed roles (with a failover or to restore original roles)
|
||||||
|
you would end up with records saying that node1 is primary and other records
|
||||||
|
saying that node2 is the primary. Which could be confusing.
|
||||||
|
Also, if you don't do anything about it the monitor history will keep growing.
|
||||||
|
For both of those reasons you sometime want to make some maintainance of the
|
||||||
|
``repl_monitor`` table.
|
||||||
|
|
||||||
|
If you want to clean the history after a few days you can execute a
|
||||||
|
truncate/delete (wheter you want to completely clean history or want to keep
|
||||||
|
a few days of history) in a cron. For example to keep just one day of history
|
||||||
|
you can put this in your crontab::
|
||||||
|
|
||||||
|
0 1 * * * psql -c "DELETE FROM repmgr_schema.repl_monitor where now() - last_monitor_time >= '1 day'::interval;" postgres
|
||||||
|
|
||||||
Configuration and command reference
|
Configuration and command reference
|
||||||
===================================
|
===================================
|
||||||
|
|
||||||
@@ -845,6 +862,7 @@ The output from this program looks like this::
|
|||||||
Usage:
|
Usage:
|
||||||
repmgr [OPTIONS] master {register}
|
repmgr [OPTIONS] master {register}
|
||||||
repmgr [OPTIONS] standby {register|clone|promote|follow}
|
repmgr [OPTIONS] standby {register|clone|promote|follow}
|
||||||
|
repmgr [OPTIONS] cluster {show|cleanup}
|
||||||
|
|
||||||
General options:
|
General options:
|
||||||
--help show this help, then exit
|
--help show this help, then exit
|
||||||
@@ -863,6 +881,8 @@ The output from this program looks like this::
|
|||||||
-R, --remote-user=USERNAME database server username for rsync
|
-R, --remote-user=USERNAME database server username for rsync
|
||||||
-w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)
|
-w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)
|
||||||
-F, --force force potentially dangerous operations to happen
|
-F, --force force potentially dangerous operations to happen
|
||||||
|
-I, --ignore-rsync-warning Ignore partial transfert warning
|
||||||
|
-k, --keep-history keeps indicated number of days of history
|
||||||
|
|
||||||
repmgr performs some tasks like clone a node, promote it or making follow another node and then exits.
|
repmgr performs some tasks like clone a node, promote it or making follow another node and then exits.
|
||||||
COMMANDS:
|
COMMANDS:
|
||||||
@@ -871,6 +891,8 @@ The output from this program looks like this::
|
|||||||
standby clone [node] - allows creation of a new standby
|
standby clone [node] - allows creation of a new standby
|
||||||
standby promote - allows manual promotion of a specific standby into a new master in the event of a failover
|
standby promote - allows manual promotion of a specific standby into a new master in the event of a failover
|
||||||
standby follow - allows the standby to re-point itself to a new master
|
standby follow - allows the standby to re-point itself to a new master
|
||||||
|
cluster show - print node informations
|
||||||
|
cluster cleanup - cleans monitor's history
|
||||||
|
|
||||||
The ``--verbose`` option can be useful in troubleshooting issues with
|
The ``--verbose`` option can be useful in troubleshooting issues with
|
||||||
the program.
|
the program.
|
||||||
@@ -941,6 +963,26 @@ its port if is different from the default one.
|
|||||||
|
|
||||||
./repmgr standby follow
|
./repmgr standby follow
|
||||||
|
|
||||||
|
* cluster show
|
||||||
|
|
||||||
|
* Shows the role (standby/master) and connection string for all nodes configured
|
||||||
|
in the cluster or "FAILED" if the node doesn't respond. This allow us to know
|
||||||
|
which nodes are alive and which one needs attention and to have a notion of the
|
||||||
|
structure of clusters we just have access to. Example::
|
||||||
|
|
||||||
|
./repmgr cluster show
|
||||||
|
|
||||||
|
* cluster cleanup
|
||||||
|
|
||||||
|
* Cleans the monitor's history from repmgr tables. This avoids the repl_monitor table
|
||||||
|
to grow excesivelly which in turns affects repl_status view performance, also
|
||||||
|
keeps controlled the space in disk used by repmgr. This command can be used manually
|
||||||
|
or in a cron to make it periodically.
|
||||||
|
There is also a --keep-history (-k) option to indicate how many days of history we
|
||||||
|
want to keep, so the command will clean up history older than "keep-history" days. Example::
|
||||||
|
|
||||||
|
./repmgr cluster cleanup -k 2
|
||||||
|
|
||||||
repmgrd Daemon
|
repmgrd Daemon
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
@@ -1023,6 +1065,7 @@ following
|
|||||||
* ERR_DB_QUERY 7: Error executing a database query.
|
* ERR_DB_QUERY 7: Error executing a database query.
|
||||||
* ERR_PROMOTED 8: Exiting program because the node has been promoted to master.
|
* ERR_PROMOTED 8: Exiting program because the node has been promoted to master.
|
||||||
* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected.
|
* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected.
|
||||||
|
* ERR_STR_OVERFLOW 10: A string was larger than expected.
|
||||||
|
|
||||||
License and Contributions
|
License and Contributions
|
||||||
=========================
|
=========================
|
||||||
|
|||||||
7
TODO
7
TODO
@@ -12,10 +12,3 @@ Known issues in repmgr
|
|||||||
|
|
||||||
* After running repmgrd as a regular foreground application, hitting
|
* After running repmgrd as a regular foreground application, hitting
|
||||||
control-C causes the program to crash.
|
control-C causes the program to crash.
|
||||||
|
|
||||||
Planned feature improvements
|
|
||||||
============================
|
|
||||||
|
|
||||||
* Before running ``pg_start_backup()``, a sanity check that there is a
|
|
||||||
a working ssh connection to the destination would help find
|
|
||||||
configuration errors before disturbing the database.
|
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ get_cluster_size(PGconn *conn)
|
|||||||
* connection string is placed there.
|
* connection string is placed there.
|
||||||
*/
|
*/
|
||||||
PGconn *
|
PGconn *
|
||||||
getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
getMasterConnection(PGconn *standby_conn, char *cluster,
|
||||||
int *master_id, char *master_conninfo_out)
|
int *master_id, char *master_conninfo_out)
|
||||||
{
|
{
|
||||||
PGconn *master_conn = NULL;
|
PGconn *master_conn = NULL;
|
||||||
@@ -242,8 +242,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
|||||||
cluster);
|
cluster);
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
|
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
|
||||||
" WHERE cluster = '%s' and id <> %d",
|
" WHERE cluster = '%s'",
|
||||||
schema_quoted, cluster, id);
|
schema_quoted, cluster);
|
||||||
|
|
||||||
res1 = PQexec(standby_conn, sqlquery);
|
res1 = PQexec(standby_conn, sqlquery);
|
||||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
||||||
|
|||||||
@@ -20,6 +20,8 @@
|
|||||||
#ifndef _REPMGR_DBUTILS_H_
|
#ifndef _REPMGR_DBUTILS_H_
|
||||||
#define _REPMGR_DBUTILS_H_
|
#define _REPMGR_DBUTILS_H_
|
||||||
|
|
||||||
|
#include "strutil.h"
|
||||||
|
|
||||||
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
||||||
PGconn *establishDBConnectionByParams(const char *keywords[],
|
PGconn *establishDBConnectionByParams(const char *keywords[],
|
||||||
const char *values[],
|
const char *values[],
|
||||||
@@ -29,7 +31,7 @@ char *pg_version(PGconn *conn, char* major_version);
|
|||||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
|
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
|
||||||
const char *value);
|
const char *value);
|
||||||
const char *get_cluster_size(PGconn *conn);
|
const char *get_cluster_size(PGconn *conn);
|
||||||
PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
PGconn *getMasterConnection(PGconn *standby_conn, char *cluster,
|
||||||
int *master_id, char *master_conninfo_out);
|
int *master_id, char *master_conninfo_out);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
397
repmgr.c
397
repmgr.c
@@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
* Commands implemented are.
|
* Commands implemented are.
|
||||||
* MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW,
|
* MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW,
|
||||||
* STANDBY PROMOTE
|
* STANDBY PROMOTE, CLUSTER SHOW, CLUSTER CLEANUP
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,6 +35,7 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "check_dir.h"
|
#include "check_dir.h"
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
#include "version.h"
|
||||||
|
|
||||||
#define RECOVERY_FILE "recovery.conf"
|
#define RECOVERY_FILE "recovery.conf"
|
||||||
#define RECOVERY_DONE_FILE "recovery.done"
|
#define RECOVERY_DONE_FILE "recovery.done"
|
||||||
@@ -45,19 +46,25 @@
|
|||||||
#define STANDBY_CLONE 3
|
#define STANDBY_CLONE 3
|
||||||
#define STANDBY_PROMOTE 4
|
#define STANDBY_PROMOTE 4
|
||||||
#define STANDBY_FOLLOW 5
|
#define STANDBY_FOLLOW 5
|
||||||
|
#define CLUSTER_SHOW 6
|
||||||
|
#define CLUSTER_CLEANUP 7
|
||||||
|
|
||||||
static void help(const char *progname);
|
static void help(const char *progname);
|
||||||
static bool create_recovery_file(const char *data_dir, char *master_conninfo);
|
static bool create_recovery_file(const char *data_dir, char *master_conninfo);
|
||||||
|
static int test_ssh_connection(char *host, char *remote_user);
|
||||||
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||||
char *local_path, bool is_directory);
|
char *local_path, bool is_directory);
|
||||||
static bool check_parameters_for_action(const int action);
|
static bool check_parameters_for_action(const int action);
|
||||||
|
static void write_primary_conninfo(char* line);
|
||||||
|
|
||||||
static void do_master_register(void);
|
static void do_master_register(void);
|
||||||
static void do_standby_register(void);
|
static void do_standby_register(void);
|
||||||
static void do_standby_clone(void);
|
static void do_standby_clone(void);
|
||||||
static void do_standby_promote(void);
|
static void do_standby_promote(void);
|
||||||
static void do_standby_follow(void);
|
static void do_standby_follow(void);
|
||||||
static void help(const char* progname);
|
static void do_cluster_show(void);
|
||||||
|
static void do_cluster_cleanup(void);
|
||||||
|
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
|
|
||||||
/* Global variables */
|
/* Global variables */
|
||||||
@@ -71,7 +78,7 @@ bool need_a_node = true;
|
|||||||
bool require_password = false;
|
bool require_password = false;
|
||||||
|
|
||||||
/* Initialization of runtime options */
|
/* Initialization of runtime options */
|
||||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, "" };
|
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", 0 };
|
||||||
t_configuration_options options = { "", -1, "", "", "" };
|
t_configuration_options options = { "", -1, "", "", "" };
|
||||||
|
|
||||||
static char *server_mode = NULL;
|
static char *server_mode = NULL;
|
||||||
@@ -90,7 +97,9 @@ main(int argc, char **argv)
|
|||||||
{"config-file", required_argument, NULL, 'f'},
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
{"remote-user", required_argument, NULL, 'R'},
|
{"remote-user", required_argument, NULL, 'R'},
|
||||||
{"wal-keep-segments", required_argument, NULL, 'w'},
|
{"wal-keep-segments", required_argument, NULL, 'w'},
|
||||||
|
{"keep-history", required_argument, NULL, 'k'},
|
||||||
{"force", no_argument, NULL, 'F'},
|
{"force", no_argument, NULL, 'F'},
|
||||||
|
{"ignore-rsync-warning", no_argument, NULL, 'I'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
@@ -110,13 +119,13 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
||||||
{
|
{
|
||||||
printf("%s (PostgreSQL) " PG_VERSION "\n", progname);
|
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options,
|
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:k:F:I:v", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
@@ -147,9 +156,18 @@ main(int argc, char **argv)
|
|||||||
if (atoi(optarg) > 0)
|
if (atoi(optarg) > 0)
|
||||||
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
|
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
|
case 'k':
|
||||||
|
if (atoi(optarg) > 0)
|
||||||
|
runtime_options.keep_history = atoi(optarg);
|
||||||
|
else
|
||||||
|
runtime_options.keep_history = 0;
|
||||||
|
break;
|
||||||
case 'F':
|
case 'F':
|
||||||
runtime_options.force = true;
|
runtime_options.force = true;
|
||||||
break;
|
break;
|
||||||
|
case 'I':
|
||||||
|
runtime_options.ignore_rsync_warn = true;
|
||||||
|
break;
|
||||||
case 'v':
|
case 'v':
|
||||||
runtime_options.verbose = true;
|
runtime_options.verbose = true;
|
||||||
break;
|
break;
|
||||||
@@ -162,7 +180,8 @@ main(int argc, char **argv)
|
|||||||
/*
|
/*
|
||||||
* Now we need to obtain the action, this comes in one of these forms:
|
* Now we need to obtain the action, this comes in one of these forms:
|
||||||
* MASTER REGISTER |
|
* MASTER REGISTER |
|
||||||
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]}
|
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
|
||||||
|
* CLUSTER {SHOW | CLEANUP}
|
||||||
*
|
*
|
||||||
* the node part is optional, if we receive it then we shouldn't
|
* the node part is optional, if we receive it then we shouldn't
|
||||||
* have received a -h option
|
* have received a -h option
|
||||||
@@ -170,8 +189,8 @@ main(int argc, char **argv)
|
|||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
server_mode = argv[optind++];
|
server_mode = argv[optind++];
|
||||||
if (strcasecmp(server_mode, "STANDBY") != 0 &&
|
if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0 &&
|
||||||
strcasecmp(server_mode, "MASTER") != 0)
|
strcasecmp(server_mode, "CLUSTER") != 0 )
|
||||||
{
|
{
|
||||||
usage();
|
usage();
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -200,13 +219,21 @@ main(int argc, char **argv)
|
|||||||
action = STANDBY_PROMOTE;
|
action = STANDBY_PROMOTE;
|
||||||
else if (strcasecmp(server_cmd, "FOLLOW") == 0)
|
else if (strcasecmp(server_cmd, "FOLLOW") == 0)
|
||||||
action = STANDBY_FOLLOW;
|
action = STANDBY_FOLLOW;
|
||||||
else
|
else if (strcasecmp(server_mode, "CLUSTER") == 0)
|
||||||
{
|
{
|
||||||
usage();
|
if(strcasecmp(server_cmd, "SHOW") == 0)
|
||||||
exit(ERR_BAD_CONFIG);
|
action = CLUSTER_SHOW;
|
||||||
|
else if(strcasecmp(server_cmd, "CLEANUP") == 0)
|
||||||
|
action = CLUSTER_CLEANUP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (action == NO_ACTION)
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
/* For some actions we still can receive a last argument */
|
/* For some actions we still can receive a last argument */
|
||||||
if (action == STANDBY_CLONE)
|
if (action == STANDBY_CLONE)
|
||||||
{
|
{
|
||||||
@@ -310,6 +337,12 @@ main(int argc, char **argv)
|
|||||||
case STANDBY_FOLLOW:
|
case STANDBY_FOLLOW:
|
||||||
do_standby_follow();
|
do_standby_follow();
|
||||||
break;
|
break;
|
||||||
|
case CLUSTER_SHOW:
|
||||||
|
do_cluster_show();
|
||||||
|
break;
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
do_cluster_cleanup();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -319,6 +352,114 @@ main(int argc, char **argv)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_cluster_show(void)
|
||||||
|
{
|
||||||
|
PGconn *conn;
|
||||||
|
PGconn *node_conn = NULL;
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
char node_role[MAXLEN];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* We need to connect to check configuration */
|
||||||
|
log_info(_("%s connecting to database\n"), progname);
|
||||||
|
conn = establishDBConnection(options.conninfo, true);
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery, "SELECT conninfo FROM %s.repl_nodes;", repmgr_schema);
|
||||||
|
log_debug("cluster show: %s\n", sqlquery);
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Can't get nodes informations, have you regitered them?\n%s\n"), PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Role | Connection String \n");
|
||||||
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
|
{
|
||||||
|
node_conn = establishDBConnection(PQgetvalue(res, i, 0), false);
|
||||||
|
if (PQstatus(node_conn) != CONNECTION_OK)
|
||||||
|
strcpy(node_role, " FAILED");
|
||||||
|
else if (is_standby(node_conn))
|
||||||
|
strcpy(node_role, " standby");
|
||||||
|
else
|
||||||
|
strcpy(node_role, "* master");
|
||||||
|
|
||||||
|
printf("%-10s", node_role);
|
||||||
|
printf("| %s\n", PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
|
PQfinish(node_conn);
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_cluster_cleanup(void)
|
||||||
|
{
|
||||||
|
int master_id;
|
||||||
|
PGconn *conn;
|
||||||
|
PGconn *master_conn;
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
|
/* I need a connection to my local db to know what node is the master */
|
||||||
|
log_info(_("%s connecting to database\n"), progname);
|
||||||
|
conn = establishDBConnection(options.conninfo, true);
|
||||||
|
|
||||||
|
/* check if there is a master in this cluster */
|
||||||
|
log_info(_("%s connecting to master database\n"), progname);
|
||||||
|
master_conn = getMasterConnection(conn, options.cluster_name,
|
||||||
|
&master_id, NULL);
|
||||||
|
if (!master_conn)
|
||||||
|
{
|
||||||
|
log_err(_("cluster cleanup: cannot connect to master\n"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_CON);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* I don't need a local connection anymore */
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
if (runtime_options.keep_history > 0)
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor "
|
||||||
|
" WHERE age(now(), last_monitor_time) >= '%d days'::interval;",
|
||||||
|
repmgr_schema, runtime_options.keep_history);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_monitor;", repmgr_schema);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("cluster cleanup: %s\n", sqlquery);
|
||||||
|
res = PQexec(master_conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("cluster cleanup: Couldn't clean history\n%s\n"), PQerrorMessage(master_conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Let's VACUUM the table to avoid autovacuum to be launched in an unexpected hour */
|
||||||
|
sqlquery_snprintf(sqlquery, "VACUUM %s.repl_monitor;", repmgr_schema);
|
||||||
|
log_debug("cluster cleanup: %s\n", sqlquery);
|
||||||
|
res = PQexec(master_conn, sqlquery);
|
||||||
|
|
||||||
|
/* XXX There is any need to check this VACUUM happens without problems? */
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_master_register(void)
|
do_master_register(void)
|
||||||
{
|
{
|
||||||
@@ -346,7 +487,7 @@ do_master_register(void)
|
|||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
if (is_standby(conn))
|
if (is_standby(conn))
|
||||||
{
|
{
|
||||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
log_err(_("Trying to register a standby node as a master\n"));
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -438,14 +579,13 @@ do_master_register(void)
|
|||||||
|
|
||||||
/* and the view */
|
/* and the view */
|
||||||
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
|
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
|
||||||
" WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node "
|
|
||||||
" ORDER BY last_monitor_time desc) "
|
|
||||||
" FROM %s.repl_monitor) "
|
|
||||||
" SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, "
|
" SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, "
|
||||||
" last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, "
|
" last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, "
|
||||||
" pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag "
|
" pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag "
|
||||||
" FROM monitor_info a "
|
" FROM %s.repl_monitor "
|
||||||
" WHERE row_number = 1", repmgr_schema, repmgr_schema);
|
" WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monitor_time) "
|
||||||
|
" FROM %s.repl_monitor GROUP BY 1)",
|
||||||
|
repmgr_schema, repmgr_schema, repmgr_schema);
|
||||||
log_debug("master register: %s\n", sqlquery);
|
log_debug("master register: %s\n", sqlquery);
|
||||||
if (!PQexec(conn, sqlquery))
|
if (!PQexec(conn, sqlquery))
|
||||||
{
|
{
|
||||||
@@ -454,6 +594,19 @@ do_master_register(void)
|
|||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* an index to improve performance of the view */
|
||||||
|
sqlquery_snprintf(sqlquery, "CREATE INDEX idx_repl_status_sort "
|
||||||
|
" ON %s.repl_monitor (last_monitor_time, standby_node) ",
|
||||||
|
repmgr_schema);
|
||||||
|
log_debug(_("master register: %s\n"), sqlquery);
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
log_err(_("Cannot indexing table %s.repl_monitor: %s\n"),
|
||||||
|
repmgr_schema, PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -461,8 +614,7 @@ do_master_register(void)
|
|||||||
int id;
|
int id;
|
||||||
|
|
||||||
/* Ensure there isn't any other master already registered */
|
/* Ensure there isn't any other master already registered */
|
||||||
master_conn = getMasterConnection(conn, options.node,
|
master_conn = getMasterConnection(conn, options.cluster_name, &id,NULL);
|
||||||
options.cluster_name, &id,NULL);
|
|
||||||
if (master_conn != NULL)
|
if (master_conn != NULL)
|
||||||
{
|
{
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
@@ -581,7 +733,7 @@ do_standby_register(void)
|
|||||||
|
|
||||||
/* check if there is a master in this cluster */
|
/* check if there is a master in this cluster */
|
||||||
log_info(_("%s connecting to master database\n"), progname);
|
log_info(_("%s connecting to master database\n"), progname);
|
||||||
master_conn = getMasterConnection(conn, options.node, options.cluster_name,
|
master_conn = getMasterConnection(conn, options.cluster_name,
|
||||||
&master_id, NULL);
|
&master_id, NULL);
|
||||||
if (!master_conn)
|
if (!master_conn)
|
||||||
{
|
{
|
||||||
@@ -788,10 +940,17 @@ do_standby_clone(void)
|
|||||||
* Check if the tablespace locations exists and that we can write to
|
* Check if the tablespace locations exists and that we can write to
|
||||||
* them.
|
* them.
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
if (strcmp(master_version, "9.0") == 0 || strcmp(master_version, "9.1") == 0)
|
||||||
"SELECT spclocation "
|
sqlquery_snprintf(sqlquery,
|
||||||
" FROM pg_tablespace "
|
"SELECT spclocation "
|
||||||
"WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
" FROM pg_tablespace "
|
||||||
|
"WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
||||||
|
else
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT pg_tablespace_location(oid) spclocation "
|
||||||
|
" FROM pg_tablespace "
|
||||||
|
"WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
||||||
|
|
||||||
log_debug("standby clone: %s\n", sqlquery);
|
log_debug("standby clone: %s\n", sqlquery);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
@@ -845,6 +1004,7 @@ do_standby_clone(void)
|
|||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
/* Trouble accessing directory */
|
/* Trouble accessing directory */
|
||||||
log_err(_("%s: could not access directory \"%s\": %s\n"),
|
log_err(_("%s: could not access directory \"%s\": %s\n"),
|
||||||
@@ -855,7 +1015,14 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log_notice("Starting backup...\n");
|
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_err(_("%s: Aborting, remote host %s is not reachable.\n"), progname, runtime_options.host);
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_CON);
|
||||||
|
}
|
||||||
|
|
||||||
/* Get the data directory full path and the configuration files location */
|
/* Get the data directory full path and the configuration files location */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
@@ -871,6 +1038,16 @@ do_standby_clone(void)
|
|||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We need all 4 parameters, and they can be retrieved only by superusers */
|
||||||
|
if (PQntuples(res) != 4)
|
||||||
|
{
|
||||||
|
log_err("%s: STANDBY CLONE should be run by a SUPERUSER\n", progname);
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
|
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
|
||||||
@@ -886,6 +1063,22 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
log_notice("Starting backup...\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* in pg 9.1 default is to wait for a sync standby to ack,
|
||||||
|
* avoid that by turning off sync rep for this session
|
||||||
|
*/
|
||||||
|
sqlquery_snprintf(sqlquery, "SET synchronous_commit TO OFF");
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err("Can't set synchronous_commit: %s\n", PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* inform the master we will start a backup and get the first XLog filename
|
* inform the master we will start a backup and get the first XLog filename
|
||||||
* so we can say to the user we need those files
|
* so we can say to the user we need those files
|
||||||
@@ -963,10 +1156,17 @@ do_standby_clone(void)
|
|||||||
* find and appropiate rsync option but besides we could someday make all
|
* find and appropiate rsync option but besides we could someday make all
|
||||||
* these rsync happen concurrently
|
* these rsync happen concurrently
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
if (strcmp(master_version, "9.0") == 0 || strcmp(master_version, "9.1") == 0)
|
||||||
"SELECT spclocation "
|
sqlquery_snprintf(sqlquery,
|
||||||
" FROM pg_tablespace "
|
"SELECT spclocation "
|
||||||
" WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
" FROM pg_tablespace "
|
||||||
|
" WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
||||||
|
else
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"SELECT pg_tablespace_location(oid) spclocation "
|
||||||
|
" FROM pg_tablespace "
|
||||||
|
" WHERE spcname NOT IN ('pg_default', 'pg_global')");
|
||||||
|
|
||||||
log_debug("standby clone: %s\n", sqlquery);
|
log_debug("standby clone: %s\n", sqlquery);
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -1022,9 +1222,6 @@ stop_backup:
|
|||||||
* Don't have this one exit if it fails, so that a more informative
|
* Don't have this one exit if it fails, so that a more informative
|
||||||
* error message will also appear about the backup not being stopped.
|
* error message will also appear about the backup not being stopped.
|
||||||
*/
|
*/
|
||||||
log_info(_("%s connecting to master database to stop backup\n"), progname);
|
|
||||||
conn=establishDBConnectionByParams(keywords,values,false);
|
|
||||||
|
|
||||||
log_notice("Finishing backup...\n");
|
log_notice("Finishing backup...\n");
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
|
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
|
||||||
log_debug("standby clone: %s\n", sqlquery);
|
log_debug("standby clone: %s\n", sqlquery);
|
||||||
@@ -1039,8 +1236,10 @@ stop_backup:
|
|||||||
}
|
}
|
||||||
last_wal_segment = PQgetvalue(res, 0, 0);
|
last_wal_segment = PQgetvalue(res, 0, 0);
|
||||||
|
|
||||||
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
|
/* don't show this message if rsync failed */
|
||||||
progname, first_wal_segment, last_wal_segment);
|
if (r == 0 && runtime_options.verbose)
|
||||||
|
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
|
||||||
|
progname, first_wal_segment, last_wal_segment);
|
||||||
|
|
||||||
/* Finished with the database connection now */
|
/* Finished with the database connection now */
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -1121,7 +1320,7 @@ do_standby_promote(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* we also need to check if there isn't any master already */
|
/* we also need to check if there isn't any master already */
|
||||||
old_master_conn = getMasterConnection(conn, options.node, options.cluster_name,
|
old_master_conn = getMasterConnection(conn, options.cluster_name,
|
||||||
&old_master_id, NULL);
|
&old_master_id, NULL);
|
||||||
if (old_master_conn != NULL)
|
if (old_master_conn != NULL)
|
||||||
{
|
{
|
||||||
@@ -1225,8 +1424,7 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
/* we also need to check if there is any master in the cluster */
|
/* we also need to check if there is any master in the cluster */
|
||||||
log_info(_("%s connecting to master database\n"), progname);
|
log_info(_("%s connecting to master database\n"), progname);
|
||||||
master_conn = getMasterConnection(conn, options.node,
|
master_conn = getMasterConnection(conn, options.cluster_name, &master_id,(char *) &master_conninfo);
|
||||||
options.cluster_name, &master_id,(char *) &master_conninfo);
|
|
||||||
if (master_conn == NULL)
|
if (master_conn == NULL)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
@@ -1272,7 +1470,7 @@ do_standby_follow(void)
|
|||||||
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
|
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
|
|
||||||
log_info(_("%s Changing standby's master"),progname);
|
log_info(_("%s Changing standby's master\n"),progname);
|
||||||
|
|
||||||
/* Get the data directory full path */
|
/* Get the data directory full path */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT setting "
|
sqlquery_snprintf(sqlquery, "SELECT setting "
|
||||||
@@ -1322,6 +1520,7 @@ void help(const char *progname)
|
|||||||
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
||||||
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
||||||
progname);
|
progname);
|
||||||
|
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
||||||
printf(_("\nGeneral options:\n"));
|
printf(_("\nGeneral options:\n"));
|
||||||
printf(_(" --help show this help, then exit\n"));
|
printf(_(" --help show this help, then exit\n"));
|
||||||
printf(_(" --version output version information, then exit\n"));
|
printf(_(" --version output version information, then exit\n"));
|
||||||
@@ -1337,6 +1536,8 @@ void help(const char *progname)
|
|||||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
||||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||||
|
printf(_(" -I, --ignore-rsync-warning Ignore partial transfert warning\n"));
|
||||||
|
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||||
|
|
||||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
||||||
printf(_("or making follow another node and then exits.\n"));
|
printf(_("or making follow another node and then exits.\n"));
|
||||||
@@ -1347,6 +1548,8 @@ void help(const char *progname)
|
|||||||
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
|
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
|
||||||
printf(_("new master in the event of a failover\n"));
|
printf(_("new master in the event of a failover\n"));
|
||||||
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
||||||
|
printf(_(" cluster show - print node informations\n"));
|
||||||
|
printf(_(" cluster cleanup - cleans monitor's history\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1379,41 +1582,7 @@ create_recovery_file(const char *data_dir, char *master_conninfo)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host,
|
write_primary_conninfo(line);
|
||||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Template a password into the connection string in recovery.conf
|
|
||||||
* if a full connection string is not already provided.
|
|
||||||
*
|
|
||||||
* Sometimes this is passed by the user explicitly, and otherwise
|
|
||||||
* we try to get it into the environment.
|
|
||||||
*
|
|
||||||
* XXX: This is pretty dirty, at least push this up to the caller rather
|
|
||||||
* than hitting environment variables at this level.
|
|
||||||
*/
|
|
||||||
if (master_conninfo == NULL)
|
|
||||||
{
|
|
||||||
char *password = getenv("PGPASSWORD");
|
|
||||||
|
|
||||||
if (password != NULL)
|
|
||||||
{
|
|
||||||
maxlen_snprintf(line,
|
|
||||||
"primary_conninfo = 'host=%s port=%s password=%s'\n",
|
|
||||||
runtime_options.host,
|
|
||||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432",
|
|
||||||
password);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (require_password)
|
|
||||||
{
|
|
||||||
log_err(_("%s: PGPASSWORD not set, but having one is required\n"),
|
|
||||||
progname);
|
|
||||||
exit(ERR_BAD_PASSWORD);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fputs(line, recovery_file) == EOF)
|
if (fputs(line, recovery_file) == EOF)
|
||||||
{
|
{
|
||||||
@@ -1428,6 +1597,24 @@ create_recovery_file(const char *data_dir, char *master_conninfo)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
test_ssh_connection(char *host, char *remote_user)
|
||||||
|
{
|
||||||
|
char script[MAXLEN];
|
||||||
|
int r;
|
||||||
|
|
||||||
|
/* Check if we have ssh connectivity to host before trying to rsync */
|
||||||
|
if (!remote_user[0])
|
||||||
|
maxlen_snprintf(script, "ssh -o Batchmode=yes %s /bin/true", host);
|
||||||
|
else
|
||||||
|
maxlen_snprintf(script, "ssh -o Batchmode=yes %s -l %s /bin/true", host, remote_user);
|
||||||
|
|
||||||
|
log_debug(_("command is: %s"), script);
|
||||||
|
r = system(script);
|
||||||
|
if (r != 0)
|
||||||
|
log_info(_("Cannot connect to the remote host (%s)\n"), host);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
copy_remote_files(char *host, char *remote_user, char *remote_path,
|
copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||||
@@ -1473,6 +1660,29 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are transfering a directory (ie: data directory, tablespace directories)
|
||||||
|
* then we can ignore some rsync warning, so if we get some of those errors we
|
||||||
|
* treat them as 0 if we have --ignore-rsync-warning commandline option set
|
||||||
|
* List of ignorable rsync errors:
|
||||||
|
* 24 Partial transfer due to vanished source files
|
||||||
|
*/
|
||||||
|
if ((WEXITSTATUS(r) == 24) && is_directory)
|
||||||
|
{
|
||||||
|
if (!runtime_options.ignore_rsync_warn)
|
||||||
|
{
|
||||||
|
log_warning( _("\nrsync completed with return code 24 "
|
||||||
|
"\"Partial transfer due to vanished source files\".\n"
|
||||||
|
"This can happen because of normal operation "
|
||||||
|
"on the master server, but it may indicate an "
|
||||||
|
"issue during cloning. If you are certain no "
|
||||||
|
"changes were made to the master, try cloning "
|
||||||
|
"again using \"repmgr --force --ignore-rsync-warning\"."));
|
||||||
|
exit(ERR_BAD_RSYNC);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
log_err(_("Can't rsync from remote file or directory (%s:%s)\n"),
|
log_err(_("Can't rsync from remote file or directory (%s:%s)\n"),
|
||||||
host_string, remote_path);
|
host_string, remote_path);
|
||||||
@@ -1599,7 +1809,48 @@ check_parameters_for_action(const int action)
|
|||||||
}
|
}
|
||||||
need_a_node = false;
|
need_a_node = false;
|
||||||
break;
|
break;
|
||||||
|
case CLUSTER_SHOW:
|
||||||
|
/* allow all parameters to be supplied */
|
||||||
|
break;
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
/* allow all parameters to be supplied */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This function uses global variables to determine connection settings. Special
|
||||||
|
* usage of the PGPASSWORD variable is handled, but strongly discouraged */
|
||||||
|
static void
|
||||||
|
write_primary_conninfo(char* line)
|
||||||
|
{
|
||||||
|
char host_buf[MAXLEN] = "";
|
||||||
|
char conn_buf[MAXLEN] = "";
|
||||||
|
char user_buf[MAXLEN] = "";
|
||||||
|
char password_buf[MAXLEN] = "";
|
||||||
|
|
||||||
|
/* Environment variable for password (UGLY, please use .pgpass!) */
|
||||||
|
const char *password = getenv("PGPASSWORD");
|
||||||
|
if (password != NULL) {
|
||||||
|
maxlen_snprintf(password_buf, " password=%s", password);
|
||||||
|
}
|
||||||
|
else if (require_password) {
|
||||||
|
log_err(_("%s: PGPASSWORD not set, but having one is required\n"),
|
||||||
|
progname);
|
||||||
|
exit(ERR_BAD_PASSWORD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.host[0]) {
|
||||||
|
maxlen_snprintf(host_buf, " host=%s", runtime_options.host);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.username[0]) {
|
||||||
|
maxlen_snprintf(user_buf, " user=%s", runtime_options.username);
|
||||||
|
}
|
||||||
|
|
||||||
|
maxlen_snprintf(conn_buf, "port=%s%s%s%s",
|
||||||
|
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432", host_buf, user_buf, password_buf);
|
||||||
|
|
||||||
|
maxlen_snprintf(line, "primary_conninfo = '%s'", conn_buf);
|
||||||
|
}
|
||||||
|
|||||||
3
repmgr.h
3
repmgr.h
@@ -55,9 +55,12 @@ typedef struct
|
|||||||
char wal_keep_segments[MAXLEN];
|
char wal_keep_segments[MAXLEN];
|
||||||
bool verbose;
|
bool verbose;
|
||||||
bool force;
|
bool force;
|
||||||
|
bool ignore_rsync_warn;
|
||||||
|
|
||||||
char masterport[MAXLEN];
|
char masterport[MAXLEN];
|
||||||
|
|
||||||
|
/* parameter used by CLUSTER CLEANUP */
|
||||||
|
int keep_history;
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
12
repmgrd.c
12
repmgrd.c
@@ -30,6 +30,7 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
#include "version.h"
|
||||||
|
|
||||||
#include "libpq/pqsignal.h"
|
#include "libpq/pqsignal.h"
|
||||||
|
|
||||||
@@ -116,7 +117,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
||||||
{
|
{
|
||||||
printf("%s (PostgreSQL) " PG_VERSION "\n", progname);
|
printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
|
||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -185,7 +186,7 @@ main(int argc, char **argv)
|
|||||||
/* I need the id of the primary as well as a connection to it */
|
/* I need the id of the primary as well as a connection to it */
|
||||||
log_info(_("%s Connecting to primary for cluster '%s'\n"),
|
log_info(_("%s Connecting to primary for cluster '%s'\n"),
|
||||||
progname, local_options.cluster_name);
|
progname, local_options.cluster_name);
|
||||||
primaryConn = getMasterConnection(myLocalConn, local_options.node,
|
primaryConn = getMasterConnection(myLocalConn,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
&primary_options.node,NULL);
|
&primary_options.node,NULL);
|
||||||
if (primaryConn == NULL)
|
if (primaryConn == NULL)
|
||||||
@@ -269,7 +270,7 @@ MonitorExecute(void)
|
|||||||
log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
|
log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
|
||||||
for (connection_retries = 0; connection_retries < 6; connection_retries++)
|
for (connection_retries = 0; connection_retries < 6; connection_retries++)
|
||||||
{
|
{
|
||||||
primaryConn = getMasterConnection(myLocalConn, local_options.node,
|
primaryConn = getMasterConnection(myLocalConn,
|
||||||
local_options.cluster_name, &primary_options.node,NULL);
|
local_options.cluster_name, &primary_options.node,NULL);
|
||||||
if (PQstatus(primaryConn) == CONNECTION_OK)
|
if (PQstatus(primaryConn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -456,7 +457,10 @@ checkNodeConfiguration(char *conninfo)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PQclear(res);
|
else
|
||||||
|
{
|
||||||
|
PQclear(res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,15 @@
|
|||||||
|
|
||||||
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap);
|
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap);
|
||||||
|
|
||||||
|
/* Add strnlen on platforms that don't have it, like OS X */
|
||||||
|
#ifndef strnlen
|
||||||
|
size_t
|
||||||
|
strnlen(const char *s, size_t n)
|
||||||
|
{
|
||||||
|
const char *end = (const char *) memchr(s, '\0', n);
|
||||||
|
return(end ? end - s : n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||||
|
|||||||
@@ -35,4 +35,9 @@ extern int xsnprintf(char *str, size_t size, const char *format, ...);
|
|||||||
extern int sqlquery_snprintf(char *str, const char *format, ...);
|
extern int sqlquery_snprintf(char *str, const char *format, ...);
|
||||||
extern int maxlen_snprintf(char *str, const char *format, ...);
|
extern int maxlen_snprintf(char *str, const char *format, ...);
|
||||||
|
|
||||||
|
/* Add strnlen on platforms that don't have it, like OS X */
|
||||||
|
#ifndef strnlen
|
||||||
|
extern size_t strnlen(const char *s, size_t n);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _STRUTIL_H_ */
|
#endif /* _STRUTIL_H_ */
|
||||||
|
|||||||
Reference in New Issue
Block a user