Add a CLUSTER CLEANUP command to clean monitor's history,

also include a --keep-history (-k) option to indicate how many days of history to keep
2026-06-01 03:39:05 +00:00 · 2012-06-13 00:39:54 -05:00
parent 7a76f1998c
commit 64fce88e99
3 changed files with 113 additions and 6 deletions
@@ -825,6 +825,22 @@ and on "prime."

 The servers are now again acting as primary on "prime" and standby on "standby".

+Maintainance of monitor history
+-------------------------------
+
+Once you have changed roles (with a failover or to restore original roles)
+you would end up with records saying that node1 is primary and other records
+saying that node2 is the primary. Which could be confusing.
+Also, if you don't do anything about it the monitor history will keep growing.
+For both of those reasons you sometime want to make some maintainance of the 
+``repl_monitor`` table.
+
+If you want to clean the history after a few days you can execute the  
+CLUSTER CLEANUP command in a cron. For example to keep just one day of history
+you can put this in your crontab::
+
+0 1 * * *   repmgr cluster cleanup -k 1 -f ~/repmgr.conf
+
 Configuration and command reference
 ===================================

@@ -953,6 +969,26 @@ its port if is different from the default one.

        ./repmgr standby follow

+* cluster show 
+
+    * Shows the role (standby/master) and connection string for all nodes configured 
+      in the cluster or "FAILED" if the node doesn't respond. This allow us to know 
+      which nodes are alive and which one needs attention and to have a notion of the
+      structure of clusters we just have access to.  Example::
+
+        ./repmgr cluster show
+
+* cluster cleanup 
+
+    * Cleans the monitor's history from repmgr tables. This avoids the repl_monitor table
+      to grow excesivelly which in turns affects repl_status view performance, also 
+      keeps controlled the space in disk used by repmgr. This command can be used manually
+      or in a cron to make it periodically.  
+      There is also a --keep-history (-k) option to indicate how many days of history we
+      want to keep, so the command will clean up history older than "keep-history" days. Example::
+
+        ./repmgr cluster cleanup -k 2
+
 repmgrd Daemon
 --------------

@@ -8,6 +8,7 @@
 * Commands implemented are.
 * MASTER REGISTER
 * STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
+ * CLUSTER SHOW, CLUSTER CLEANUP
 * WITNESS CREATE
 *
 * This program is free software: you can redistribute it and/or modify
@@ -48,6 +49,7 @@
 #define STANDBY_FOLLOW 	 5
 #define WITNESS_CREATE   6
 #define CLUSTER_SHOW     7
+#define CLUSTER_CLEANUP  8

 static bool create_recovery_file(const char *data_dir);
 static int test_ssh_connection(char *host, char *remote_user);
@@ -65,6 +67,7 @@ static void do_standby_promote(void);
 static void do_standby_follow(void);
 static void do_witness_create(void);
 static void do_cluster_show(void);
+static void do_cluster_cleanup(void);

 static void usage(void);
 static void help(const char *progname);
@@ -80,7 +83,7 @@ bool need_a_node = true;
 bool require_password = false;

 /* Initialization of runtime options */
-t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "" };
+t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, "", 0 };
 t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", -1 };

 static char		*server_mode = NULL;
@@ -100,6 +103,7 @@ main(int argc, char **argv)
 		{"config-file", required_argument, NULL, 'f'},
 		{"remote-user", required_argument, NULL, 'R'},
 		{"wal-keep-segments", required_argument, NULL, 'w'},
+        {"keep-history", required_argument, NULL, 'k'},
 		{"force", no_argument, NULL, 'F'},
 		{"ignore-rsync-warning", no_argument, NULL, 'I'},
 		{"verbose", no_argument, NULL, 'v'},
@@ -127,7 +131,7 @@ main(int argc, char **argv)
 	}


-	while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:F:I:v", long_options,
+	while ((c = getopt_long(argc, argv, "d:h:p:U:D:l:f:R:w:k:F:I:v", long_options,
 	                        &optindex)) != -1)
 	{
 		switch (c)
@@ -162,6 +166,12 @@ main(int argc, char **argv)
 			if (atoi(optarg) > 0)
 				strncpy(runtime_options.wal_keep_segments, optarg, MAXLEN);
 			break;
+		case 'k':
+			if (atoi(optarg) > 0)
+				runtime_options.keep_history = atoi(optarg);
+			else
+				runtime_options.keep_history = 0; 
+			break;
 		case 'F':
 			runtime_options.force = true;
 			break;
@@ -182,7 +192,7 @@ main(int argc, char **argv)
 	 * MASTER REGISTER |
 	 * STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} |
 	 * WITNESS CREATE
-	 * CLUSTER SHOW
+	 * CLUSTER {SHOW | CLEANUP}
 	 *
 	 * the node part is optional, if we receive it then we shouldn't
 	 * have received a -h option
@@ -223,6 +233,8 @@ main(int argc, char **argv)
 		{
 			if( strcasecmp(server_cmd, "SHOW") == 0)
 				action = CLUSTER_SHOW;
+			else if(strcasecmp(server_cmd, "CLEANUP") == 0)
+				action = CLUSTER_CLEANUP;
 		}
 		else if (strcasecmp(server_mode, "WITNESS") == 0)
 			if (strcasecmp(server_cmd, "CREATE") == 0)
@@ -348,6 +360,9 @@ main(int argc, char **argv)
 	case CLUSTER_SHOW:
 		do_cluster_show();
 		break;
+	case CLUSTER_CLEANUP:
+		do_cluster_cleanup();
+		break;
 	default:
 		usage();
 		exit(ERR_BAD_CONFIG);
@@ -402,10 +417,59 @@ do_cluster_show(void)
 	}

 	PQclear(res);
-
-
 }

+ static void
+do_cluster_cleanup(void)
+{
+	int         master_id;
+	PGconn   *master_conn;
+	PGresult *res;
+	char     sqlquery[QUERY_STR_LEN];
+	char     node_role[MAXLEN];
+	int      i;
+
+	/* check if there is a master in this cluster */
+	log_info(_("%s connecting to master database\n"), progname);
+	master_conn = getMasterConnection(master_conn, repmgr_schema, options.cluster_name,
+										&master_id, NULL);
+	if (!master_conn)
+	{
+		log_err(_("cluster cleanup: cannot connect to master\n"));
+		exit(ERR_DB_CON);
+	}
+
+	if (runtime_options.keep_history > 0)
+	{
+		sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor "
+									" WHERE age(now(), last_monitor_time) >= '%d days'::interval;", 
+									repmgr_schema, keep_history);
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_monitor;", repmgr_schema);
+	}
+	res = PQexec(conn, sqlquery);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		log_err(_("cluster cleanup: Couldn't clean history\n%s\n"), PQerrorMessage(conn));
+		PQclear(res);
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}
+	PQclear(res);
+
+	/* Let's VACUUM the table to avoid autovacuum to be launched in an unexpected hour */
+	sqlquery_snprintf(sqlquery, "VACUUM %s.repl_monitor;", repmgr_schema);
+	res = PQexec(conn, sqlquery);
+
+	/* XXX There is any need to check this VACUUM happens without problems? */
+
+	PQclear(res);
+	PQfinish(conn);
+}
+
+
 static void
 do_master_register(void)
 {
@@ -1642,7 +1706,7 @@ help(const char *progname)
 	printf(_(" %s [OPTIONS] master	{register}\n"), progname);
 	printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
 	       progname);
-	printf(_(" %s [OPTIONS] cluster show\n"), progname);
+	printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
 	printf(_("\nGeneral options:\n"));
 	printf(_("	--help					   show this help, then exit\n"));
 	printf(_("	--version				   output version information, then exit\n"));
@@ -1659,6 +1723,7 @@ help(const char *progname)
 	printf(_("	-R, --remote-user=USERNAME database server username for rsync\n"));
 	printf(_("	-w, --wal-keep-segments=VALUE  minimum value for the GUC wal_keep_segments (default: 5000)\n"));
 	printf(_("	-I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
+    printf(_("  -k, --keep-history=VALUE   keeps indicated number of days of history\n"));
 	printf(_("	-F, --force				   force potentially dangerous operations to happen\n"));

 	printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
@@ -1671,6 +1736,7 @@ help(const char *progname)
 	printf(_("new master in the event of a failover\n"));
 	printf(_(" standby follow		 - allows the standby to re-point itself to a new master\n"));
 	printf(_(" cluster show            - print node informations\n"));
+    printf(_(" cluster cleanup         - cleans monitor's history\n"));
 }


@@ -1935,6 +2001,9 @@ check_parameters_for_action(const int action)
 	case CLUSTER_SHOW:
 		/* allow all parameters to be supplied */
 		break;
+	case CLUSTER_CLEANUP:
+		/* allow all parameters to be supplied */
+		break;
 	}

 	return ok;
@@ -64,6 +64,8 @@ typedef struct
 	char masterport[MAXLEN];
 	char localport[MAXLEN];

+	/* parameter used by CLUSTER CLEANUP */
+	int keep_history;
 } t_runtime_options;

 #define SLEEP_MONITOR		2