mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d36ee899dc | ||
|
|
d790ef740b | ||
|
|
aa6633b027 | ||
|
|
c3bffce379 | ||
|
|
78aea00a6d | ||
|
|
91601204b5 | ||
|
|
c91ddc2f5e | ||
|
|
72f74dd7a7 | ||
|
|
901d07fa92 | ||
|
|
f0e609bcd4 | ||
|
|
94c9c3a5c6 | ||
|
|
3af5243bcc | ||
|
|
85bbae462a | ||
|
|
14e49d41c2 | ||
|
|
1bd8a703c8 | ||
|
|
3ca0f2db06 |
7
HISTORY
7
HISTORY
@@ -31,3 +31,10 @@
|
||||
1.1.0 2011-03-09
|
||||
Make options -U, -R and -p not mandatory (Jaime)
|
||||
|
||||
1.1.1 2012-04-18
|
||||
Add --ignore-rsync-warning (Cédric)
|
||||
Add strnlen for compatibility with OS X (Greg)
|
||||
Improve performance of repl_status view (Jaime)
|
||||
Remove last argument from log_err (Jaime, Reported by Jeroen Dekkers)
|
||||
Complete documentation about possible error conditions (Jaime)
|
||||
Document how to clean history (Jaime)
|
||||
|
||||
19
README.rst
19
README.rst
@@ -814,6 +814,23 @@ and on "prime."
|
||||
|
||||
The servers are now again acting as primary on "prime" and standby on "standby".
|
||||
|
||||
Maintainance of monitor history
|
||||
-------------------------------
|
||||
|
||||
Once you have changed roles (with a failover or to restore original roles)
|
||||
you would end up with records saying that node1 is primary and other records
|
||||
saying that node2 is the primary. Which could be confusing.
|
||||
Also, if you don't do anything about it the monitor history will keep growing.
|
||||
For both of those reasons you sometime want to make some maintainance of the
|
||||
``repl_monitor`` table.
|
||||
|
||||
If you want to clean the history after a few days you can execute a
|
||||
truncate/delete (wheter you want to completely clean history or want to keep
|
||||
a few days of history) in a cron. For example to keep just one day of history
|
||||
you can put this in your crontab::
|
||||
|
||||
0 1 * * * psql -c "DELETE FROM repmgr_schema.repl_monitor where now() - last_monitor_time >= '1 day'::interval;" postgres
|
||||
|
||||
Configuration and command reference
|
||||
===================================
|
||||
|
||||
@@ -863,6 +880,7 @@ The output from this program looks like this::
|
||||
-R, --remote-user=USERNAME database server username for rsync
|
||||
-w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)
|
||||
-F, --force force potentially dangerous operations to happen
|
||||
-I, --ignore-rsync-warning Ignore partial transfert warning
|
||||
|
||||
repmgr performs some tasks like clone a node, promote it or making follow another node and then exits.
|
||||
COMMANDS:
|
||||
@@ -1023,6 +1041,7 @@ following
|
||||
* ERR_DB_QUERY 7: Error executing a database query.
|
||||
* ERR_PROMOTED 8: Exiting program because the node has been promoted to master.
|
||||
* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected.
|
||||
* ERR_STR_OVERFLOW 10: A string was larger than expected.
|
||||
|
||||
License and Contributions
|
||||
=========================
|
||||
|
||||
@@ -129,10 +129,10 @@ mkdir_p(char *path, mode_t omode)
|
||||
{
|
||||
struct stat sb;
|
||||
mode_t numask,
|
||||
oumask;
|
||||
oumask;
|
||||
int first,
|
||||
last,
|
||||
retval;
|
||||
last,
|
||||
retval;
|
||||
char *p;
|
||||
|
||||
p = path;
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
#ifndef _REPMGR_DBUTILS_H_
|
||||
#define _REPMGR_DBUTILS_H_
|
||||
|
||||
#include "strutil.h"
|
||||
|
||||
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
||||
PGconn *establishDBConnectionByParams(const char *keywords[],
|
||||
const char *values[],
|
||||
@@ -27,7 +29,7 @@ PGconn *establishDBConnectionByParams(const char *keywords[],
|
||||
bool is_standby(PGconn *conn);
|
||||
char *pg_version(PGconn *conn, char* major_version);
|
||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
const char *value);
|
||||
const char *get_cluster_size(PGconn *conn);
|
||||
PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
||||
int *master_id, char *master_conninfo_out);
|
||||
|
||||
89
repmgr.c
89
repmgr.c
@@ -49,7 +49,7 @@
|
||||
static void help(const char *progname);
|
||||
static bool create_recovery_file(const char *data_dir, char *master_conninfo);
|
||||
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
char *local_path, bool is_directory);
|
||||
char *local_path, bool is_directory);
|
||||
static bool check_parameters_for_action(const int action);
|
||||
|
||||
static void do_master_register(void);
|
||||
@@ -71,7 +71,7 @@ bool need_a_node = true;
|
||||
bool require_password = false;
|
||||
|
||||
/* Initialization of runtime options */
|
||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, "" };
|
||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" };
|
||||
t_configuration_options options = { "", -1, "", "", "" };
|
||||
|
||||
static char *server_mode = NULL;
|
||||
@@ -91,6 +91,7 @@ main(int argc, char **argv)
|
||||
{"remote-user", required_argument, NULL, 'R'},
|
||||
{"wal-keep-segments", required_argument, NULL, 'w'},
|
||||
{"force", no_argument, NULL, 'F'},
|
||||
{"ignore-rsync-warning", no_argument, NULL, 'I'},
|
||||
{"verbose", no_argument, NULL, 'v'},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
@@ -116,7 +117,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
|
||||
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options,
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:I:v", long_options,
|
||||
&optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
@@ -150,6 +151,9 @@ main(int argc, char **argv)
|
||||
case 'F':
|
||||
runtime_options.force = true;
|
||||
break;
|
||||
case 'I':
|
||||
runtime_options.ignore_rsync_warn = true;
|
||||
break;
|
||||
case 'v':
|
||||
runtime_options.verbose = true;
|
||||
break;
|
||||
@@ -346,7 +350,7 @@ do_master_register(void)
|
||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||
if (is_standby(conn))
|
||||
{
|
||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||
log_err(_("Trying to register a standby node as a master\n"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -438,14 +442,13 @@ do_master_register(void)
|
||||
|
||||
/* and the view */
|
||||
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
|
||||
" WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node "
|
||||
" ORDER BY last_monitor_time desc) "
|
||||
" FROM %s.repl_monitor) "
|
||||
" SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, "
|
||||
" last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, "
|
||||
" pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag "
|
||||
" FROM monitor_info a "
|
||||
" WHERE row_number = 1", repmgr_schema, repmgr_schema);
|
||||
" FROM %s.repl_monitor "
|
||||
" WHERE (standby_node, last_monitor_time) IN (SELECT standby_node, MAX(last_monitor_time) "
|
||||
" FROM %s.repl_monitor GROUP BY 1)",
|
||||
repmgr_schema, repmgr_schema, repmgr_schema);
|
||||
log_debug("master register: %s\n", sqlquery);
|
||||
if (!PQexec(conn, sqlquery))
|
||||
{
|
||||
@@ -454,6 +457,19 @@ do_master_register(void)
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* an index to improve performance of the view */
|
||||
sqlquery_snprintf(sqlquery, "CREATE INDEX idx_repl_status_sort "
|
||||
" ON %s.repl_monitor (last_monitor_time, standby_node) ",
|
||||
repmgr_schema);
|
||||
log_debug(_("master register: %s\n"), sqlquery);
|
||||
if (!PQexec(conn, sqlquery))
|
||||
{
|
||||
log_err(_("Cannot indexing table %s.repl_monitor: %s\n"),
|
||||
repmgr_schema, PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -886,6 +902,20 @@ do_standby_clone(void)
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* in pg 9.1 default is to wait for a sync standby to ack,
|
||||
* avoid that by turning off sync rep for this session
|
||||
*/
|
||||
sqlquery_snprintf(sqlquery, "SET synchronous_commit TO OFF");
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err("Can't set synchronous_commit: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* inform the master we will start a backup and get the first XLog filename
|
||||
* so we can say to the user we need those files
|
||||
@@ -1022,9 +1052,6 @@ stop_backup:
|
||||
* Don't have this one exit if it fails, so that a more informative
|
||||
* error message will also appear about the backup not being stopped.
|
||||
*/
|
||||
log_info(_("%s connecting to master database to stop backup\n"), progname);
|
||||
conn=establishDBConnectionByParams(keywords,values,false);
|
||||
|
||||
log_notice("Finishing backup...\n");
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
|
||||
log_debug("standby clone: %s\n", sqlquery);
|
||||
@@ -1039,8 +1066,10 @@ stop_backup:
|
||||
}
|
||||
last_wal_segment = PQgetvalue(res, 0, 0);
|
||||
|
||||
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
|
||||
progname, first_wal_segment, last_wal_segment);
|
||||
/* don't show this message if rsync failed */
|
||||
if (r == 0)
|
||||
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
|
||||
progname, first_wal_segment, last_wal_segment);
|
||||
|
||||
/* Finished with the database connection now */
|
||||
PQclear(res);
|
||||
@@ -1337,6 +1366,7 @@ void help(const char *progname)
|
||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||
printf(_(" -I, --ignore-rsync-warning Ignore partial transfert warning\n"));
|
||||
|
||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
||||
printf(_("or making follow another node and then exits.\n"));
|
||||
@@ -1379,8 +1409,8 @@ create_recovery_file(const char *data_dir, char *master_conninfo)
|
||||
return false;
|
||||
}
|
||||
|
||||
maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host,
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432");
|
||||
maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host,
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432");
|
||||
|
||||
/*
|
||||
* Template a password into the connection string in recovery.conf
|
||||
@@ -1400,8 +1430,8 @@ create_recovery_file(const char *data_dir, char *master_conninfo)
|
||||
{
|
||||
maxlen_snprintf(line,
|
||||
"primary_conninfo = 'host=%s port=%s password=%s'\n",
|
||||
runtime_options.host,
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432",
|
||||
runtime_options.host,
|
||||
(runtime_options.masterport[0]) ? runtime_options.masterport : "5432",
|
||||
password);
|
||||
}
|
||||
else
|
||||
@@ -1473,6 +1503,29 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
||||
|
||||
r = system(script);
|
||||
|
||||
/*
|
||||
* If we are transfering a directory (ie: data directory, tablespace directories)
|
||||
* then we can ignore some rsync warning, so if we get some of those errors we
|
||||
* treat them as 0 if we have --ignore-rsync-warning commandline option set
|
||||
* List of ignorable rsync errors:
|
||||
* 24 Partial transfer due to vanished source files
|
||||
*/
|
||||
if ((WEXITSTATUS(r) == 24) && is_directory)
|
||||
{
|
||||
if (!runtime_options.ignore_rsync_warn)
|
||||
{
|
||||
log_warning( _("\nrsync completed with return code 24 "
|
||||
"\"Partial transfer due to vanished source files\".\n"
|
||||
"This can happen because of normal operation "
|
||||
"on the master server, but it may indicate an "
|
||||
"issue during cloning. If you are certain no "
|
||||
"changes were made to the master, try cloning "
|
||||
"again using \"repmgr --force --ignore-rsync-warning\"."));
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
else
|
||||
r = 0;
|
||||
}
|
||||
if (r != 0)
|
||||
log_err(_("Can't rsync from remote file or directory (%s:%s)\n"),
|
||||
host_string, remote_path);
|
||||
|
||||
1
repmgr.h
1
repmgr.h
@@ -55,6 +55,7 @@ typedef struct
|
||||
char wal_keep_segments[MAXLEN];
|
||||
bool verbose;
|
||||
bool force;
|
||||
bool ignore_rsync_warn;
|
||||
|
||||
char masterport[MAXLEN];
|
||||
|
||||
|
||||
@@ -27,6 +27,15 @@
|
||||
|
||||
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap);
|
||||
|
||||
/* Add strnlen on platforms that don't have it, like OS X */
|
||||
#ifndef strnlen
|
||||
size_t
|
||||
strnlen(const char *s, size_t n)
|
||||
{
|
||||
const char *end = (const char *) memchr(s, '\0', n);
|
||||
return(end ? end - s : n);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
||||
|
||||
@@ -35,4 +35,9 @@ extern int xsnprintf(char *str, size_t size, const char *format, ...);
|
||||
extern int sqlquery_snprintf(char *str, const char *format, ...);
|
||||
extern int maxlen_snprintf(char *str, const char *format, ...);
|
||||
|
||||
/* Add strnlen on platforms that don't have it, like OS X */
|
||||
#ifndef strnlen
|
||||
extern size_t strnlen(const char *s, size_t n);
|
||||
#endif
|
||||
|
||||
#endif /* _STRUTIL_H_ */
|
||||
|
||||
Reference in New Issue
Block a user