mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Add a CLUSTER CLEANUP command to clean monitor's history,
also include a --keep-history (-k) option to indicate how many days of history to keep
This commit is contained in:
36
README.rst
36
README.rst
@@ -825,6 +825,22 @@ and on "prime."
|
||||
|
||||
The servers are now again acting as primary on "prime" and standby on "standby".
|
||||
|
||||
Maintainance of monitor history
|
||||
-------------------------------
|
||||
|
||||
Once you have changed roles (with a failover or to restore original roles)
|
||||
you would end up with records saying that node1 is primary and other records
|
||||
saying that node2 is the primary. Which could be confusing.
|
||||
Also, if you don't do anything about it the monitor history will keep growing.
|
||||
For both of those reasons you sometime want to make some maintainance of the
|
||||
``repl_monitor`` table.
|
||||
|
||||
If you want to clean the history after a few days you can execute the
|
||||
CLUSTER CLEANUP command in a cron. For example to keep just one day of history
|
||||
you can put this in your crontab::
|
||||
|
||||
0 1 * * * repmgr cluster cleanup -k 1 -f ~/repmgr.conf
|
||||
|
||||
Configuration and command reference
|
||||
===================================
|
||||
|
||||
@@ -953,6 +969,26 @@ its port if is different from the default one.
|
||||
|
||||
./repmgr standby follow
|
||||
|
||||
* cluster show
|
||||
|
||||
* Shows the role (standby/master) and connection string for all nodes configured
|
||||
in the cluster or "FAILED" if the node doesn't respond. This allow us to know
|
||||
which nodes are alive and which one needs attention and to have a notion of the
|
||||
structure of clusters we just have access to. Example::
|
||||
|
||||
./repmgr cluster show
|
||||
|
||||
* cluster cleanup
|
||||
|
||||
* Cleans the monitor's history from repmgr tables. This avoids the repl_monitor table
|
||||
to grow excesivelly which in turns affects repl_status view performance, also
|
||||
keeps controlled the space in disk used by repmgr. This command can be used manually
|
||||
or in a cron to make it periodically.
|
||||
There is also a --keep-history (-k) option to indicate how many days of history we
|
||||
want to keep, so the command will clean up history older than "keep-history" days. Example::
|
||||
|
||||
./repmgr cluster cleanup -k 2
|
||||
|
||||
repmgrd Daemon
|
||||
--------------
|
||||
|
||||
|
||||
81
repmgr.c
81
repmgr.c
@@ -8,6 +8,7 @@
|
||||
* Commands implemented are.
|
||||
* MASTER REGISTER
|
||||
* STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
|
||||
* CLUSTER SHOW, CLUSTER CLEANUP
|
||||
* WITNESS CREATE
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -48,6 +49,7 @@
|
||||
#define STANDBY_FOLLOW 5
|
||||
#define WITNESS_CREATE 6
|
||||
#define CLUSTER_SHOW 7
|
||||
#define CLUSTER_CLEANUP 8
|
||||
|
||||
static bool create_recovery_file(const char *data_dir);
|
||||
static int test_ssh_connection(char *host, char *remote_user);
|
||||
@@ -65,6 +67,7 @@ static void do_standby_promote(void);
|
||||
static void do_standby_follow(void);
|
||||
static void do_witness_create(void);
|
||||
static void do_cluster_show(void);
|
||||
static void do_cluster_cleanup(void);
|
||||
|
||||
static void usage(void);
|
||||
static void help(const char *progname);
|
||||
@@ -80,7 +83,7 @@ bool need_a_node = true;
|
||||
bool require_password = false;
|
||||
|
||||
/* Initialization of runtime options */
|
||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" };
|
||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", 0 };
|
||||
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 };
|
||||
|
||||
static char *server_mode = NULL;
|
||||
@@ -100,6 +103,7 @@ main(int argc, char **argv)
|
||||
{"config-file", required_argument, NULL, 'f'},
|
||||
{"remote-user", required_argument, NULL, 'R'},
|
||||
{"wal-keep-segments", required_argument, NULL, 'w'},
|
||||
{"keep-history", required_argument, NULL, 'k'},
|
||||
{"force", no_argument, NULL, 'F'},
|
||||
{"ignore-rsync-warning", no_argument, NULL, 'I'},
|
||||
{"verbose", no_argument, NULL, 'v'},
|
||||
@@ -127,7 +131,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
|
||||
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:F:I:v", long_options,
|
||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:I:v", long_options,
|
||||
&optindex)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
@@ -162,6 +166,12 @@ main(int argc, char **argv)
|
||||
if (atoi(optarg) > 0)
|
||||
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
|
||||
break;
|
||||
case 'k':
|
||||
if (atoi(optarg) > 0)
|
||||
runtime_options.keep_history = atoi(optarg);
|
||||
else
|
||||
runtime_options.keep_history = 0;
|
||||
break;
|
||||
case 'F':
|
||||
runtime_options.force = true;
|
||||
break;
|
||||
@@ -182,7 +192,7 @@ main(int argc, char **argv)
|
||||
* MASTER REGISTER |
|
||||
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
|
||||
* WITNESS CREATE
|
||||
* CLUSTER SHOW
|
||||
* CLUSTER {SHOW | CLEANUP}
|
||||
*
|
||||
* the node part is optional, if we receive it then we shouldn't
|
||||
* have received a -h option
|
||||
@@ -223,6 +233,8 @@ main(int argc, char **argv)
|
||||
{
|
||||
if( strcasecmp(server_cmd, "SHOW") == 0)
|
||||
action = CLUSTER_SHOW;
|
||||
else if(strcasecmp(server_cmd, "CLEANUP") == 0)
|
||||
action = CLUSTER_CLEANUP;
|
||||
}
|
||||
else if (strcasecmp(server_mode, "WITNESS") == 0)
|
||||
if (strcasecmp(server_cmd, "CREATE") == 0)
|
||||
@@ -348,6 +360,9 @@ main(int argc, char **argv)
|
||||
case CLUSTER_SHOW:
|
||||
do_cluster_show();
|
||||
break;
|
||||
case CLUSTER_CLEANUP:
|
||||
do_cluster_cleanup();
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
exit(ERR_BAD_CONFIG);
|
||||
@@ -402,10 +417,59 @@ do_cluster_show(void)
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
do_cluster_cleanup(void)
|
||||
{
|
||||
int master_id;
|
||||
PGconn *master_conn;
|
||||
PGresult *res;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char node_role[MAXLEN];
|
||||
int i;
|
||||
|
||||
/* check if there is a master in this cluster */
|
||||
log_info(_("%s connecting to master database\n"), progname);
|
||||
master_conn = getMasterConnection(master_conn, repmgr_schema, options.cluster_name,
|
||||
&master_id, NULL);
|
||||
if (!master_conn)
|
||||
{
|
||||
log_err(_("cluster cleanup: cannot connect to master\n"));
|
||||
exit(ERR_DB_CON);
|
||||
}
|
||||
|
||||
if (runtime_options.keep_history > 0)
|
||||
{
|
||||
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor "
|
||||
" WHERE age(now(), last_monitor_time) >= '%d days'::interval;",
|
||||
repmgr_schema, keep_history);
|
||||
}
|
||||
else
|
||||
{
|
||||
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_monitor;", repmgr_schema);
|
||||
}
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_err(_("cluster cleanup: Couldn't clean history\n%s\n"), PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
/* Let's VACUUM the table to avoid autovacuum to be launched in an unexpected hour */
|
||||
sqlquery_snprintf(sqlquery, "VACUUM %s.repl_monitor;", repmgr_schema);
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
/* XXX There is any need to check this VACUUM happens without problems? */
|
||||
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_master_register(void)
|
||||
{
|
||||
@@ -1642,7 +1706,7 @@ help(const char *progname)
|
||||
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
||||
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
||||
progname);
|
||||
printf(_(" %s [OPTIONS] cluster show\n"), progname);
|
||||
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
||||
printf(_("\nGeneral options:\n"));
|
||||
printf(_(" --help show this help, then exit\n"));
|
||||
printf(_(" --version output version information, then exit\n"));
|
||||
@@ -1659,6 +1723,7 @@ help(const char *progname)
|
||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
||||
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
|
||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||
|
||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
||||
@@ -1671,6 +1736,7 @@ help(const char *progname)
|
||||
printf(_("new master in the event of a failover\n"));
|
||||
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
||||
printf(_(" cluster show - print node informations\n"));
|
||||
printf(_(" cluster cleanup - cleans monitor's history\n"));
|
||||
}
|
||||
|
||||
|
||||
@@ -1935,6 +2001,9 @@ check_parameters_for_action(const int action)
|
||||
case CLUSTER_SHOW:
|
||||
/* allow all parameters to be supplied */
|
||||
break;
|
||||
case CLUSTER_CLEANUP:
|
||||
/* allow all parameters to be supplied */
|
||||
break;
|
||||
}
|
||||
|
||||
return ok;
|
||||
|
||||
Reference in New Issue
Block a user