mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Implement "repmgr cluster cleanup"
This commit is contained in:
18
README.md
18
README.md
@@ -1405,10 +1405,10 @@ The view `replication_status` shows the most recent state for each node, e.g.:
|
|||||||
The interval in which monitoring history is written is controlled by the
|
The interval in which monitoring history is written is controlled by the
|
||||||
configuration parameter `monitor_interval_secs`; default is 2.
|
configuration parameter `monitor_interval_secs`; default is 2.
|
||||||
|
|
||||||
As this can generate a large amount of monitoring data in the `monitoring_history`
|
As this can generate a large amount of monitoring data in the table
|
||||||
table, it's advisable to regularly purge historical data with
|
`repmgr.monitoring_history`. it's advisable to regularly purge historical data
|
||||||
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
using the `repmgr cluster cleanup` command ; use the `-k/--keep-history` to
|
||||||
many day's worth of data should be retained. *XXX not yet implemented*
|
specify how many day's worth of data should be retained.
|
||||||
|
|
||||||
It's possible to use `repmgrd` to provide monitoring only for some or all
|
It's possible to use `repmgrd` to provide monitoring only for some or all
|
||||||
nodes by setting `failover=manual` in the node's `repmgr.conf` file. In the
|
nodes by setting `failover=manual` in the node's `repmgr.conf` file. In the
|
||||||
@@ -1870,6 +1870,16 @@ The following commands are available:
|
|||||||
3 | node3 | standby_register | t | 2017-08-17 10:28:55 | standby registration succeeded
|
3 | node3 | standby_register | t | 2017-08-17 10:28:55 | standby registration succeeded
|
||||||
2 | node2 | standby_register | t | 2017-08-17 10:28:53 | standby registration succeeded
|
2 | node2 | standby_register | t | 2017-08-17 10:28:53 | standby registration succeeded
|
||||||
|
|
||||||
|
* `cluster cleanup`
|
||||||
|
|
||||||
|
Purges monitoring history from the `repmgr.monitoring_history` table to
|
||||||
|
prevent excessive table growth. Use the `-k/--keep-history` to specify the
|
||||||
|
number of days of monitoring history to retain. This command can be used
|
||||||
|
manually or as a cronjob.
|
||||||
|
|
||||||
|
This command requires a valid `repmgr.conf` file for the node on which it is
|
||||||
|
executed, either specified explicitly with `-f/--config-file` or located in
|
||||||
|
the current working directory; no additional arguments are required.
|
||||||
|
|
||||||
|
|
||||||
Generating event notifications with repmgr/repmgrd
|
Generating event notifications with repmgr/repmgrd
|
||||||
|
|||||||
101
dbutils.c
101
dbutils.c
@@ -1661,6 +1661,31 @@ checkpoint(PGconn *conn)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* assumes superuser connection */
|
||||||
|
bool
|
||||||
|
vacuum_table(PGconn *primary_conn, const char *table)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
bool success = true;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&query, "VACUUM %s", table);
|
||||||
|
|
||||||
|
res = PQexec(primary_conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
log_debug("%i", (int) PQresultStatus(res));
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===================== */
|
/* ===================== */
|
||||||
/* Node record functions */
|
/* Node record functions */
|
||||||
@@ -3408,8 +3433,7 @@ is_server_available(const char *conninfo)
|
|||||||
/* ==================== */
|
/* ==================== */
|
||||||
|
|
||||||
void
|
void
|
||||||
add_monitoring_record(
|
add_monitoring_record(PGconn *primary_conn,
|
||||||
PGconn *primary_conn,
|
|
||||||
PGconn *local_conn,
|
PGconn *local_conn,
|
||||||
int primary_node_id,
|
int primary_node_id,
|
||||||
int local_node_id,
|
int local_node_id,
|
||||||
@@ -3478,6 +3502,79 @@ add_monitoring_record(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
int record_count = -1;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
"SELECT COUNT(*) "
|
||||||
|
" FROM repmgr.monitoring_history "
|
||||||
|
" WHERE age(now(), last_monitor_time) >= '%d days'::interval",
|
||||||
|
keep_history);
|
||||||
|
|
||||||
|
res = PQexec(primary_conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to query number of monitoring records to clean up"));
|
||||||
|
log_detail("%s", PQerrorMessage(primary_conn));
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
record_count = atoi(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return record_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
delete_monitoring_records(PGconn *primary_conn, int keep_history)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
bool success = true;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (keep_history > 0)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
"DELETE FROM repmgr.monitoring_history "
|
||||||
|
" WHERE age(now(), last_monitor_time) >= '%d days'::interval ",
|
||||||
|
keep_history);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
"TRUNCATE TABLE repmgr.monitoring_history");
|
||||||
|
}
|
||||||
|
|
||||||
|
res = PQexec(primary_conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* node voting functions
|
* node voting functions
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -396,7 +396,7 @@ ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
|||||||
|
|
||||||
/* node management functions */
|
/* node management functions */
|
||||||
void checkpoint(PGconn *conn);
|
void checkpoint(PGconn *conn);
|
||||||
|
bool vacuum_table(PGconn *conn, const char *table);
|
||||||
|
|
||||||
|
|
||||||
/* node record functions */
|
/* node record functions */
|
||||||
@@ -456,8 +456,7 @@ bool is_server_available(const char *conninfo);
|
|||||||
|
|
||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
void
|
void
|
||||||
add_monitoring_record(
|
add_monitoring_record(PGconn *primary_conn,
|
||||||
PGconn *primary_conn,
|
|
||||||
PGconn *local_conn,
|
PGconn *local_conn,
|
||||||
int primary_node_id,
|
int primary_node_id,
|
||||||
int local_node_id,
|
int local_node_id,
|
||||||
@@ -469,6 +468,10 @@ add_monitoring_record(
|
|||||||
long long unsigned int apply_lag_bytes
|
long long unsigned int apply_lag_bytes
|
||||||
);
|
);
|
||||||
|
|
||||||
|
int get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history);
|
||||||
|
bool delete_monitoring_records(PGconn *primary_conn, int keep_history);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* node voting functions */
|
/* node voting functions */
|
||||||
NodeVotingStatus get_voting_status(PGconn *conn);
|
NodeVotingStatus get_voting_status(PGconn *conn);
|
||||||
|
|||||||
@@ -1287,6 +1287,67 @@ cube_set_node_status(t_node_status_cube **cube, int n, int execute_node_id, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
do_cluster_cleanup(void)
|
||||||
|
{
|
||||||
|
PGconn *conn = NULL;
|
||||||
|
PGconn *primary_conn = NULL;
|
||||||
|
int entries_to_delete = 0;
|
||||||
|
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
|
||||||
|
/* check if there is a master in this cluster */
|
||||||
|
log_info(_("connecting to primary server"));
|
||||||
|
primary_conn = establish_primary_db_connection(conn, true);
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
|
||||||
|
log_debug(_("number of days of monitoring history to retain: %i"), runtime_options.keep_history);
|
||||||
|
|
||||||
|
entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn, runtime_options.keep_history);
|
||||||
|
|
||||||
|
if (entries_to_delete == 0)
|
||||||
|
{
|
||||||
|
log_info(_("no monitoring records to delete"));
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("at least %i monitoring records for deletion",
|
||||||
|
entries_to_delete);
|
||||||
|
|
||||||
|
if (delete_monitoring_records(primary_conn, runtime_options.keep_history) == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to delete monitoring records"));
|
||||||
|
log_detail("%s", PQerrorMessage(primary_conn));
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vacuum_table(primary_conn, "repmgr.monitoring_history") == false)
|
||||||
|
{
|
||||||
|
/* annoying if this fails, but not fatal */
|
||||||
|
log_warning(_("unable to vacuum table repmgr.monitoring_history\n"));
|
||||||
|
log_detail("%s", PQerrorMessage(primary_conn));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
|
||||||
|
if (runtime_options.keep_history > 0)
|
||||||
|
{
|
||||||
|
log_notice(_("monitoring records older than %i day(s) deleted"),
|
||||||
|
runtime_options.keep_history);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info(_("all monitoring records deleted"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
do_cluster_help(void)
|
do_cluster_help(void)
|
||||||
{
|
{
|
||||||
@@ -1305,7 +1366,7 @@ do_cluster_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file or database connection required.\n"));
|
printf(_(" Configuration file or database connection required.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
printf(_(" --csv emit output as CSV (with a subset of fields)\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("CLUSTER MATRIX\n"));
|
printf(_("CLUSTER MATRIX\n"));
|
||||||
@@ -1314,7 +1375,7 @@ do_cluster_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file or database connection required.\n"));
|
printf(_(" Configuration file or database connection required.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV\n"));
|
printf(_(" --csv emit output as CSV\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("CLUSTER CROSSCHECK\n"));
|
printf(_("CLUSTER CROSSCHECK\n"));
|
||||||
@@ -1323,7 +1384,7 @@ do_cluster_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file or database connection required.\n"));
|
printf(_(" Configuration file or database connection required.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV\n"));
|
printf(_(" --csv emit output as CSV\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
|
|
||||||
@@ -1331,12 +1392,18 @@ do_cluster_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"cluster event\" lists recent events logged in the \"repmgr.events\" table.\n"));
|
printf(_(" \"cluster event\" lists recent events logged in the \"repmgr.events\" table.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT);
|
printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT);
|
||||||
printf(_(" --all display all events (overrides --limit)\n"));
|
printf(_(" --all display all events (overrides --limit)\n"));
|
||||||
printf(_(" --event filter specific event\n"));
|
printf(_(" --event filter specific event\n"));
|
||||||
printf(_(" --node-id restrict entries to node with this ID\n"));
|
printf(_(" --node-id restrict entries to node with this ID\n"));
|
||||||
printf(_(" --node-name restrict entries to node with this name\n"));
|
printf(_(" --node-name restrict entries to node with this name\n"));
|
||||||
|
puts("");
|
||||||
|
|
||||||
|
printf(_("CLUSTER EVENT\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" \"cluster event\" purges records from the \"repmgr.monitor\" table.\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" -k, --keep-history=VALUE retain indicated number of days of history (default: 0)\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ extern void do_cluster_show(void);
|
|||||||
extern void do_cluster_event(void);
|
extern void do_cluster_event(void);
|
||||||
extern void do_cluster_crosscheck(void);
|
extern void do_cluster_crosscheck(void);
|
||||||
extern void do_cluster_matrix(void);
|
extern void do_cluster_matrix(void);
|
||||||
|
extern void do_cluster_cleanup(void);
|
||||||
|
|
||||||
extern void do_cluster_help(void);
|
extern void do_cluster_help(void);
|
||||||
|
|
||||||
|
|||||||
@@ -116,6 +116,9 @@ typedef struct
|
|||||||
char event[MAXLEN];
|
char event[MAXLEN];
|
||||||
int limit;
|
int limit;
|
||||||
|
|
||||||
|
/* "cluster cleanup" options */
|
||||||
|
int keep_history;
|
||||||
|
|
||||||
/* following options for internal use */
|
/* following options for internal use */
|
||||||
char config_archive_dir[MAXPGPATH];
|
char config_archive_dir[MAXPGPATH];
|
||||||
OutputMode output_mode;
|
OutputMode output_mode;
|
||||||
@@ -155,6 +158,8 @@ typedef struct
|
|||||||
"", false, false, false, \
|
"", false, false, false, \
|
||||||
/* "cluster event" options */ \
|
/* "cluster event" options */ \
|
||||||
false, "", CLUSTER_EVENT_LIMIT, \
|
false, "", CLUSTER_EVENT_LIMIT, \
|
||||||
|
/* "cluster cleanup" options */ \
|
||||||
|
0, \
|
||||||
/* Following options for internal use */ \
|
/* Following options for internal use */ \
|
||||||
"/tmp", OM_TEXT \
|
"/tmp", OM_TEXT \
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
* CLUSTER EVENT
|
* CLUSTER EVENT
|
||||||
* CLUSTER CROSSCHECK
|
* CLUSTER CROSSCHECK
|
||||||
* CLUSTER MATRIX
|
* CLUSTER MATRIX
|
||||||
|
* CLUSTER CLEANUP
|
||||||
*
|
*
|
||||||
* NODE STATUS
|
* NODE STATUS
|
||||||
* NODE CHECK
|
* NODE CHECK
|
||||||
@@ -499,6 +500,16 @@ main(int argc, char **argv)
|
|||||||
runtime_options.all = true;
|
runtime_options.all = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*------------------------
|
||||||
|
* "cluster cleanup" options
|
||||||
|
*------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* -k/--keep-history */
|
||||||
|
case 'k':
|
||||||
|
runtime_options.keep_history = repmgr_atoi(optarg, "-k/--keep-history", &cli_errors, false);
|
||||||
|
break;
|
||||||
|
|
||||||
/*----------------
|
/*----------------
|
||||||
* logging options
|
* logging options
|
||||||
*----------------
|
*----------------
|
||||||
@@ -688,17 +699,18 @@ main(int argc, char **argv)
|
|||||||
exit_with_cli_errors(&cli_errors);
|
exit_with_cli_errors(&cli_errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------
|
||||||
* Determine the node type and action; following are valid:
|
* Determine the node type and action; following are valid:
|
||||||
*
|
*
|
||||||
* { PRIMARY | MASTER } REGISTER | STANDBY {REGISTER | UNREGISTER | CLONE
|
* { PRIMARY | MASTER } REGISTER |
|
||||||
* [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} | BDR {
|
* STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
|
||||||
* REGISTER | UNREGISTER } | NODE { STATUS | CHECK | REJOIN |
|
* BDR { REGISTER | UNREGISTER } |
|
||||||
* ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } | CLUSTER { CROSSCHECK |
|
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
||||||
* MATRIX | SHOW | CLEANUP | EVENT }
|
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
||||||
*
|
*
|
||||||
* [node] is an optional hostname, provided instead of the -h/--host
|
* [node] is an optional hostname, provided instead of the -h/--host
|
||||||
* option
|
* option
|
||||||
|
* ---------
|
||||||
*/
|
*/
|
||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
@@ -818,6 +830,8 @@ main(int argc, char **argv)
|
|||||||
action = CLUSTER_CROSSCHECK;
|
action = CLUSTER_CROSSCHECK;
|
||||||
else if (strcasecmp(repmgr_action, "MATRIX") == 0)
|
else if (strcasecmp(repmgr_action, "MATRIX") == 0)
|
||||||
action = CLUSTER_MATRIX;
|
action = CLUSTER_MATRIX;
|
||||||
|
else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
|
||||||
|
action = CLUSTER_CLEANUP;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1200,6 +1214,9 @@ main(int argc, char **argv)
|
|||||||
case CLUSTER_MATRIX:
|
case CLUSTER_MATRIX:
|
||||||
do_cluster_matrix();
|
do_cluster_matrix();
|
||||||
break;
|
break;
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
do_cluster_cleanup();
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
/* An action will have been determined by this point */
|
/* An action will have been determined by this point */
|
||||||
|
|||||||
@@ -169,6 +169,9 @@ static struct option long_options[] =
|
|||||||
{"event", required_argument, NULL, OPT_EVENT},
|
{"event", required_argument, NULL, OPT_EVENT},
|
||||||
{"limit", required_argument, NULL, OPT_LIMIT},
|
{"limit", required_argument, NULL, OPT_LIMIT},
|
||||||
|
|
||||||
|
/* "cluster cleanup" options */
|
||||||
|
{"keep-history", required_argument, NULL, 'k'},
|
||||||
|
|
||||||
/* Following options for internal use */
|
/* Following options for internal use */
|
||||||
{"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR},
|
{"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR},
|
||||||
|
|
||||||
@@ -181,7 +184,7 @@ static struct option long_options[] =
|
|||||||
|
|
||||||
|
|
||||||
/* not yet handled */
|
/* not yet handled */
|
||||||
{"keep-history", required_argument, NULL, 'k'},
|
|
||||||
{"mode", required_argument, NULL, 'm'},
|
{"mode", required_argument, NULL, 'm'},
|
||||||
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
||||||
{"pg_rewind", optional_argument, NULL, OPT_PG_REWIND},
|
{"pg_rewind", optional_argument, NULL, OPT_PG_REWIND},
|
||||||
|
|||||||
Reference in New Issue
Block a user