mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Implement "repmgr cluster cleanup"
This commit is contained in:
18
README.md
18
README.md
@@ -1405,10 +1405,10 @@ The view `replication_status` shows the most recent state for each node, e.g.:
|
||||
The interval in which monitoring history is written is controlled by the
|
||||
configuration parameter `monitor_interval_secs`; default is 2.
|
||||
|
||||
As this can generate a large amount of monitoring data in the `monitoring_history`
|
||||
table, it's advisable to regularly purge historical data with
|
||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||
many day's worth of data should be retained. *XXX not yet implemented*
|
||||
As this can generate a large amount of monitoring data in the table
|
||||
`repmgr.monitoring_history`. it's advisable to regularly purge historical data
|
||||
using the `repmgr cluster cleanup` command ; use the `-k/--keep-history` to
|
||||
specify how many day's worth of data should be retained.
|
||||
|
||||
It's possible to use `repmgrd` to provide monitoring only for some or all
|
||||
nodes by setting `failover=manual` in the node's `repmgr.conf` file. In the
|
||||
@@ -1870,6 +1870,16 @@ The following commands are available:
|
||||
3 | node3 | standby_register | t | 2017-08-17 10:28:55 | standby registration succeeded
|
||||
2 | node2 | standby_register | t | 2017-08-17 10:28:53 | standby registration succeeded
|
||||
|
||||
* `cluster cleanup`
|
||||
|
||||
Purges monitoring history from the `repmgr.monitoring_history` table to
|
||||
prevent excessive table growth. Use the `-k/--keep-history` to specify the
|
||||
number of days of monitoring history to retain. This command can be used
|
||||
manually or as a cronjob.
|
||||
|
||||
This command requires a valid `repmgr.conf` file for the node on which it is
|
||||
executed, either specified explicitly with `-f/--config-file` or located in
|
||||
the current working directory; no additional arguments are required.
|
||||
|
||||
|
||||
Generating event notifications with repmgr/repmgrd
|
||||
|
||||
101
dbutils.c
101
dbutils.c
@@ -1661,6 +1661,31 @@ checkpoint(PGconn *conn)
|
||||
return;
|
||||
}
|
||||
|
||||
/* assumes superuser connection */
|
||||
bool
|
||||
vacuum_table(PGconn *primary_conn, const char *table)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
bool success = true;
|
||||
PGresult *res = NULL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query, "VACUUM %s", table);
|
||||
|
||||
res = PQexec(primary_conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
log_debug("%i", (int) PQresultStatus(res));
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
/* ===================== */
|
||||
/* Node record functions */
|
||||
@@ -3408,8 +3433,7 @@ is_server_available(const char *conninfo)
|
||||
/* ==================== */
|
||||
|
||||
void
|
||||
add_monitoring_record(
|
||||
PGconn *primary_conn,
|
||||
add_monitoring_record(PGconn *primary_conn,
|
||||
PGconn *local_conn,
|
||||
int primary_node_id,
|
||||
int local_node_id,
|
||||
@@ -3478,6 +3502,79 @@ add_monitoring_record(
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
int record_count = -1;
|
||||
PGresult *res = NULL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT COUNT(*) "
|
||||
" FROM repmgr.monitoring_history "
|
||||
" WHERE age(now(), last_monitor_time) >= '%d days'::interval",
|
||||
keep_history);
|
||||
|
||||
res = PQexec(primary_conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to query number of monitoring records to clean up"));
|
||||
log_detail("%s", PQerrorMessage(primary_conn));
|
||||
|
||||
PQclear(res);
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
else
|
||||
{
|
||||
record_count = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return record_count;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
delete_monitoring_records(PGconn *primary_conn, int keep_history)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
bool success = true;
|
||||
PGresult *res = NULL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
if (keep_history > 0)
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
"DELETE FROM repmgr.monitoring_history "
|
||||
" WHERE age(now(), last_monitor_time) >= '%d days'::interval ",
|
||||
keep_history);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
"TRUNCATE TABLE repmgr.monitoring_history");
|
||||
}
|
||||
|
||||
res = PQexec(primary_conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
/*
|
||||
* node voting functions
|
||||
*
|
||||
|
||||
@@ -396,7 +396,7 @@ ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||
|
||||
/* node management functions */
|
||||
void checkpoint(PGconn *conn);
|
||||
|
||||
bool vacuum_table(PGconn *conn, const char *table);
|
||||
|
||||
|
||||
/* node record functions */
|
||||
@@ -456,8 +456,7 @@ bool is_server_available(const char *conninfo);
|
||||
|
||||
/* monitoring functions */
|
||||
void
|
||||
add_monitoring_record(
|
||||
PGconn *primary_conn,
|
||||
add_monitoring_record(PGconn *primary_conn,
|
||||
PGconn *local_conn,
|
||||
int primary_node_id,
|
||||
int local_node_id,
|
||||
@@ -469,6 +468,10 @@ add_monitoring_record(
|
||||
long long unsigned int apply_lag_bytes
|
||||
);
|
||||
|
||||
int get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history);
|
||||
bool delete_monitoring_records(PGconn *primary_conn, int keep_history);
|
||||
|
||||
|
||||
|
||||
/* node voting functions */
|
||||
NodeVotingStatus get_voting_status(PGconn *conn);
|
||||
|
||||
@@ -1287,6 +1287,67 @@ cube_set_node_status(t_node_status_cube **cube, int n, int execute_node_id, int
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_cluster_cleanup(void)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
PGconn *primary_conn = NULL;
|
||||
int entries_to_delete = 0;
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
|
||||
/* check if there is a master in this cluster */
|
||||
log_info(_("connecting to primary server"));
|
||||
primary_conn = establish_primary_db_connection(conn, true);
|
||||
|
||||
PQfinish(conn);
|
||||
|
||||
log_debug(_("number of days of monitoring history to retain: %i"), runtime_options.keep_history);
|
||||
|
||||
entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn, runtime_options.keep_history);
|
||||
|
||||
if (entries_to_delete == 0)
|
||||
{
|
||||
log_info(_("no monitoring records to delete"));
|
||||
PQfinish(primary_conn);
|
||||
return;
|
||||
}
|
||||
|
||||
log_debug("at least %i monitoring records for deletion",
|
||||
entries_to_delete);
|
||||
|
||||
if (delete_monitoring_records(primary_conn, runtime_options.keep_history) == false)
|
||||
{
|
||||
log_error(_("unable to delete monitoring records"));
|
||||
log_detail("%s", PQerrorMessage(primary_conn));
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
if (vacuum_table(primary_conn, "repmgr.monitoring_history") == false)
|
||||
{
|
||||
/* annoying if this fails, but not fatal */
|
||||
log_warning(_("unable to vacuum table repmgr.monitoring_history\n"));
|
||||
log_detail("%s", PQerrorMessage(primary_conn));
|
||||
}
|
||||
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
if (runtime_options.keep_history > 0)
|
||||
{
|
||||
log_notice(_("monitoring records older than %i day(s) deleted"),
|
||||
runtime_options.keep_history);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("all monitoring records deleted"));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_cluster_help(void)
|
||||
{
|
||||
@@ -1305,7 +1366,7 @@ do_cluster_help(void)
|
||||
puts("");
|
||||
printf(_(" Configuration file or database connection required.\n"));
|
||||
puts("");
|
||||
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
||||
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("CLUSTER MATRIX\n"));
|
||||
@@ -1314,7 +1375,7 @@ do_cluster_help(void)
|
||||
puts("");
|
||||
printf(_(" Configuration file or database connection required.\n"));
|
||||
puts("");
|
||||
printf(_(" --csv emit output as CSV\n"));
|
||||
printf(_(" --csv emit output as CSV\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("CLUSTER CROSSCHECK\n"));
|
||||
@@ -1323,7 +1384,7 @@ do_cluster_help(void)
|
||||
puts("");
|
||||
printf(_(" Configuration file or database connection required.\n"));
|
||||
puts("");
|
||||
printf(_(" --csv emit output as CSV\n"));
|
||||
printf(_(" --csv emit output as CSV\n"));
|
||||
puts("");
|
||||
|
||||
|
||||
@@ -1331,12 +1392,18 @@ do_cluster_help(void)
|
||||
puts("");
|
||||
printf(_(" \"cluster event\" lists recent events logged in the \"repmgr.events\" table.\n"));
|
||||
puts("");
|
||||
printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT);
|
||||
printf(_(" --all display all events (overrides --limit)\n"));
|
||||
printf(_(" --event filter specific event\n"));
|
||||
printf(_(" --node-id restrict entries to node with this ID\n"));
|
||||
printf(_(" --node-name restrict entries to node with this name\n"));
|
||||
printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT);
|
||||
printf(_(" --all display all events (overrides --limit)\n"));
|
||||
printf(_(" --event filter specific event\n"));
|
||||
printf(_(" --node-id restrict entries to node with this ID\n"));
|
||||
printf(_(" --node-name restrict entries to node with this name\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("CLUSTER EVENT\n"));
|
||||
puts("");
|
||||
printf(_(" \"cluster event\" purges records from the \"repmgr.monitor\" table.\n"));
|
||||
puts("");
|
||||
printf(_(" -k, --keep-history=VALUE retain indicated number of days of history (default: 0)\n"));
|
||||
puts("");
|
||||
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ extern void do_cluster_show(void);
|
||||
extern void do_cluster_event(void);
|
||||
extern void do_cluster_crosscheck(void);
|
||||
extern void do_cluster_matrix(void);
|
||||
extern void do_cluster_cleanup(void);
|
||||
|
||||
extern void do_cluster_help(void);
|
||||
|
||||
|
||||
@@ -116,6 +116,9 @@ typedef struct
|
||||
char event[MAXLEN];
|
||||
int limit;
|
||||
|
||||
/* "cluster cleanup" options */
|
||||
int keep_history;
|
||||
|
||||
/* following options for internal use */
|
||||
char config_archive_dir[MAXPGPATH];
|
||||
OutputMode output_mode;
|
||||
@@ -155,6 +158,8 @@ typedef struct
|
||||
"", false, false, false, \
|
||||
/* "cluster event" options */ \
|
||||
false, "", CLUSTER_EVENT_LIMIT, \
|
||||
/* "cluster cleanup" options */ \
|
||||
0, \
|
||||
/* Following options for internal use */ \
|
||||
"/tmp", OM_TEXT \
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
* CLUSTER EVENT
|
||||
* CLUSTER CROSSCHECK
|
||||
* CLUSTER MATRIX
|
||||
* CLUSTER CLEANUP
|
||||
*
|
||||
* NODE STATUS
|
||||
* NODE CHECK
|
||||
@@ -499,6 +500,16 @@ main(int argc, char **argv)
|
||||
runtime_options.all = true;
|
||||
break;
|
||||
|
||||
/*------------------------
|
||||
* "cluster cleanup" options
|
||||
*------------------------
|
||||
*/
|
||||
|
||||
/* -k/--keep-history */
|
||||
case 'k':
|
||||
runtime_options.keep_history = repmgr_atoi(optarg, "-k/--keep-history", &cli_errors, false);
|
||||
break;
|
||||
|
||||
/*----------------
|
||||
* logging options
|
||||
*----------------
|
||||
@@ -688,17 +699,18 @@ main(int argc, char **argv)
|
||||
exit_with_cli_errors(&cli_errors);
|
||||
}
|
||||
|
||||
/*
|
||||
/*----------
|
||||
* Determine the node type and action; following are valid:
|
||||
*
|
||||
* { PRIMARY | MASTER } REGISTER | STANDBY {REGISTER | UNREGISTER | CLONE
|
||||
* [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} | BDR {
|
||||
* REGISTER | UNREGISTER } | NODE { STATUS | CHECK | REJOIN |
|
||||
* ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } | CLUSTER { CROSSCHECK |
|
||||
* MATRIX | SHOW | CLEANUP | EVENT }
|
||||
* { PRIMARY | MASTER } REGISTER |
|
||||
* STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
|
||||
* BDR { REGISTER | UNREGISTER } |
|
||||
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
||||
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
||||
*
|
||||
* [node] is an optional hostname, provided instead of the -h/--host
|
||||
* option
|
||||
* ---------
|
||||
*/
|
||||
if (optind < argc)
|
||||
{
|
||||
@@ -818,6 +830,8 @@ main(int argc, char **argv)
|
||||
action = CLUSTER_CROSSCHECK;
|
||||
else if (strcasecmp(repmgr_action, "MATRIX") == 0)
|
||||
action = CLUSTER_MATRIX;
|
||||
else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
|
||||
action = CLUSTER_CLEANUP;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1200,6 +1214,9 @@ main(int argc, char **argv)
|
||||
case CLUSTER_MATRIX:
|
||||
do_cluster_matrix();
|
||||
break;
|
||||
case CLUSTER_CLEANUP:
|
||||
do_cluster_cleanup();
|
||||
break;
|
||||
|
||||
default:
|
||||
/* An action will have been determined by this point */
|
||||
|
||||
@@ -169,6 +169,9 @@ static struct option long_options[] =
|
||||
{"event", required_argument, NULL, OPT_EVENT},
|
||||
{"limit", required_argument, NULL, OPT_LIMIT},
|
||||
|
||||
/* "cluster cleanup" options */
|
||||
{"keep-history", required_argument, NULL, 'k'},
|
||||
|
||||
/* Following options for internal use */
|
||||
{"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR},
|
||||
|
||||
@@ -181,7 +184,7 @@ static struct option long_options[] =
|
||||
|
||||
|
||||
/* not yet handled */
|
||||
{"keep-history", required_argument, NULL, 'k'},
|
||||
|
||||
{"mode", required_argument, NULL, 'm'},
|
||||
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
||||
{"pg_rewind", optional_argument, NULL, OPT_PG_REWIND},
|
||||
|
||||
Reference in New Issue
Block a user