mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
Add a CLUSTER CLEANUP command to clean monitor's history,
also include a --keep-history (-k) option to indicate how many days of history to keep
This commit is contained in:
36
README.rst
36
README.rst
@@ -825,6 +825,22 @@ and on "prime."
|
|||||||
|
|
||||||
The servers are now again acting as primary on "prime" and standby on "standby".
|
The servers are now again acting as primary on "prime" and standby on "standby".
|
||||||
|
|
||||||
|
Maintainance of monitor history
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Once you have changed roles (with a failover or to restore original roles)
|
||||||
|
you would end up with records saying that node1 is primary and other records
|
||||||
|
saying that node2 is the primary. Which could be confusing.
|
||||||
|
Also, if you don't do anything about it the monitor history will keep growing.
|
||||||
|
For both of those reasons you sometime want to make some maintainance of the
|
||||||
|
``repl_monitor`` table.
|
||||||
|
|
||||||
|
If you want to clean the history after a few days you can execute the
|
||||||
|
CLUSTER CLEANUP command in a cron. For example to keep just one day of history
|
||||||
|
you can put this in your crontab::
|
||||||
|
|
||||||
|
0 1 * * * repmgr cluster cleanup -k 1 -f ~/repmgr.conf
|
||||||
|
|
||||||
Configuration and command reference
|
Configuration and command reference
|
||||||
===================================
|
===================================
|
||||||
|
|
||||||
@@ -953,6 +969,26 @@ its port if is different from the default one.
|
|||||||
|
|
||||||
./repmgr standby follow
|
./repmgr standby follow
|
||||||
|
|
||||||
|
* cluster show
|
||||||
|
|
||||||
|
* Shows the role (standby/master) and connection string for all nodes configured
|
||||||
|
in the cluster or "FAILED" if the node doesn't respond. This allow us to know
|
||||||
|
which nodes are alive and which one needs attention and to have a notion of the
|
||||||
|
structure of clusters we just have access to. Example::
|
||||||
|
|
||||||
|
./repmgr cluster show
|
||||||
|
|
||||||
|
* cluster cleanup
|
||||||
|
|
||||||
|
* Cleans the monitor's history from repmgr tables. This avoids the repl_monitor table
|
||||||
|
to grow excesivelly which in turns affects repl_status view performance, also
|
||||||
|
keeps controlled the space in disk used by repmgr. This command can be used manually
|
||||||
|
or in a cron to make it periodically.
|
||||||
|
There is also a --keep-history (-k) option to indicate how many days of history we
|
||||||
|
want to keep, so the command will clean up history older than "keep-history" days. Example::
|
||||||
|
|
||||||
|
./repmgr cluster cleanup -k 2
|
||||||
|
|
||||||
repmgrd Daemon
|
repmgrd Daemon
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
|
|||||||
81
repmgr.c
81
repmgr.c
@@ -8,6 +8,7 @@
|
|||||||
* Commands implemented are.
|
* Commands implemented are.
|
||||||
* MASTER REGISTER
|
* MASTER REGISTER
|
||||||
* STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
|
* STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
|
||||||
|
* CLUSTER SHOW, CLUSTER CLEANUP
|
||||||
* WITNESS CREATE
|
* WITNESS CREATE
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
@@ -48,6 +49,7 @@
|
|||||||
#define STANDBY_FOLLOW 5
|
#define STANDBY_FOLLOW 5
|
||||||
#define WITNESS_CREATE 6
|
#define WITNESS_CREATE 6
|
||||||
#define CLUSTER_SHOW 7
|
#define CLUSTER_SHOW 7
|
||||||
|
#define CLUSTER_CLEANUP 8
|
||||||
|
|
||||||
static bool create_recovery_file(const char *data_dir);
|
static bool create_recovery_file(const char *data_dir);
|
||||||
static int test_ssh_connection(char *host, char *remote_user);
|
static int test_ssh_connection(char *host, char *remote_user);
|
||||||
@@ -65,6 +67,7 @@ static void do_standby_promote(void);
|
|||||||
static void do_standby_follow(void);
|
static void do_standby_follow(void);
|
||||||
static void do_witness_create(void);
|
static void do_witness_create(void);
|
||||||
static void do_cluster_show(void);
|
static void do_cluster_show(void);
|
||||||
|
static void do_cluster_cleanup(void);
|
||||||
|
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
static void help(const char *progname);
|
static void help(const char *progname);
|
||||||
@@ -80,7 +83,7 @@ bool need_a_node = true;
|
|||||||
bool require_password = false;
|
bool require_password = false;
|
||||||
|
|
||||||
/* Initialization of runtime options */
|
/* Initialization of runtime options */
|
||||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" };
|
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", 0 };
|
||||||
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 };
|
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 };
|
||||||
|
|
||||||
static char *server_mode = NULL;
|
static char *server_mode = NULL;
|
||||||
@@ -100,6 +103,7 @@ main(int argc, char **argv)
|
|||||||
{"config-file", required_argument, NULL, 'f'},
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
{"remote-user", required_argument, NULL, 'R'},
|
{"remote-user", required_argument, NULL, 'R'},
|
||||||
{"wal-keep-segments", required_argument, NULL, 'w'},
|
{"wal-keep-segments", required_argument, NULL, 'w'},
|
||||||
|
{"keep-history", required_argument, NULL, 'k'},
|
||||||
{"force", no_argument, NULL, 'F'},
|
{"force", no_argument, NULL, 'F'},
|
||||||
{"ignore-rsync-warning", no_argument, NULL, 'I'},
|
{"ignore-rsync-warning", no_argument, NULL, 'I'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
@@ -127,7 +131,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:F:I:v", long_options,
|
while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:I:v", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
@@ -162,6 +166,12 @@ main(int argc, char **argv)
|
|||||||
if (atoi(optarg) > 0)
|
if (atoi(optarg) > 0)
|
||||||
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
|
strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
|
||||||
break;
|
break;
|
||||||
|
case 'k':
|
||||||
|
if (atoi(optarg) > 0)
|
||||||
|
runtime_options.keep_history = atoi(optarg);
|
||||||
|
else
|
||||||
|
runtime_options.keep_history = 0;
|
||||||
|
break;
|
||||||
case 'F':
|
case 'F':
|
||||||
runtime_options.force = true;
|
runtime_options.force = true;
|
||||||
break;
|
break;
|
||||||
@@ -182,7 +192,7 @@ main(int argc, char **argv)
|
|||||||
* MASTER REGISTER |
|
* MASTER REGISTER |
|
||||||
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
|
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
|
||||||
* WITNESS CREATE
|
* WITNESS CREATE
|
||||||
* CLUSTER SHOW
|
* CLUSTER {SHOW | CLEANUP}
|
||||||
*
|
*
|
||||||
* the node part is optional, if we receive it then we shouldn't
|
* the node part is optional, if we receive it then we shouldn't
|
||||||
* have received a -h option
|
* have received a -h option
|
||||||
@@ -223,6 +233,8 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
if( strcasecmp(server_cmd, "SHOW") == 0)
|
if( strcasecmp(server_cmd, "SHOW") == 0)
|
||||||
action = CLUSTER_SHOW;
|
action = CLUSTER_SHOW;
|
||||||
|
else if(strcasecmp(server_cmd, "CLEANUP") == 0)
|
||||||
|
action = CLUSTER_CLEANUP;
|
||||||
}
|
}
|
||||||
else if (strcasecmp(server_mode, "WITNESS") == 0)
|
else if (strcasecmp(server_mode, "WITNESS") == 0)
|
||||||
if (strcasecmp(server_cmd, "CREATE") == 0)
|
if (strcasecmp(server_cmd, "CREATE") == 0)
|
||||||
@@ -348,6 +360,9 @@ main(int argc, char **argv)
|
|||||||
case CLUSTER_SHOW:
|
case CLUSTER_SHOW:
|
||||||
do_cluster_show();
|
do_cluster_show();
|
||||||
break;
|
break;
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
do_cluster_cleanup();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -402,10 +417,59 @@ do_cluster_show(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_cluster_cleanup(void)
|
||||||
|
{
|
||||||
|
int master_id;
|
||||||
|
PGconn *master_conn;
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
char node_role[MAXLEN];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* check if there is a master in this cluster */
|
||||||
|
log_info(_("%s connecting to master database\n"), progname);
|
||||||
|
master_conn = getMasterConnection(master_conn, repmgr_schema, options.cluster_name,
|
||||||
|
&master_id, NULL);
|
||||||
|
if (!master_conn)
|
||||||
|
{
|
||||||
|
log_err(_("cluster cleanup: cannot connect to master\n"));
|
||||||
|
exit(ERR_DB_CON);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.keep_history > 0)
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor "
|
||||||
|
" WHERE age(now(), last_monitor_time) >= '%d days'::interval;",
|
||||||
|
repmgr_schema, keep_history);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_monitor;", repmgr_schema);
|
||||||
|
}
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("cluster cleanup: Couldn't clean history\n%s\n"), PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Let's VACUUM the table to avoid autovacuum to be launched in an unexpected hour */
|
||||||
|
sqlquery_snprintf(sqlquery, "VACUUM %s.repl_monitor;", repmgr_schema);
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
|
/* XXX There is any need to check this VACUUM happens without problems? */
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_master_register(void)
|
do_master_register(void)
|
||||||
{
|
{
|
||||||
@@ -1642,7 +1706,7 @@ help(const char *progname)
|
|||||||
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
||||||
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
||||||
progname);
|
progname);
|
||||||
printf(_(" %s [OPTIONS] cluster show\n"), progname);
|
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
||||||
printf(_("\nGeneral options:\n"));
|
printf(_("\nGeneral options:\n"));
|
||||||
printf(_(" --help show this help, then exit\n"));
|
printf(_(" --help show this help, then exit\n"));
|
||||||
printf(_(" --version output version information, then exit\n"));
|
printf(_(" --version output version information, then exit\n"));
|
||||||
@@ -1659,6 +1723,7 @@ help(const char *progname)
|
|||||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
||||||
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
|
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
|
||||||
|
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
||||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
||||||
|
|
||||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
||||||
@@ -1671,6 +1736,7 @@ help(const char *progname)
|
|||||||
printf(_("new master in the event of a failover\n"));
|
printf(_("new master in the event of a failover\n"));
|
||||||
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
||||||
printf(_(" cluster show - print node informations\n"));
|
printf(_(" cluster show - print node informations\n"));
|
||||||
|
printf(_(" cluster cleanup - cleans monitor's history\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1935,6 +2001,9 @@ check_parameters_for_action(const int action)
|
|||||||
case CLUSTER_SHOW:
|
case CLUSTER_SHOW:
|
||||||
/* allow all parameters to be supplied */
|
/* allow all parameters to be supplied */
|
||||||
break;
|
break;
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
/* allow all parameters to be supplied */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
|
|||||||
Reference in New Issue
Block a user