From 18b40be7f1e574c402cbd8d1296866b830194e0b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 3 Jul 2017 12:14:52 +0900 Subject: [PATCH] Add configuration option "log_status_interval" Normally (outside of log level DEBUG) repmgrd doesn't generate any kind of log output, so examining the log file may give the impression it's not working. Output an informational message at regular intervals to show it's up and running. --- config.c | 3 ++ config.h | 3 +- doc/changes-in-repmgr4.md | 1 + log.h | 2 ++ repmgr.conf.sample | 3 +- repmgrd.c | 59 +++++++++++++++++++++++++++++++++++---- 6 files changed, 64 insertions(+), 7 deletions(-) diff --git a/config.c b/config.c index ebfdc6be..e857cdea 100644 --- a/config.c +++ b/config.c @@ -219,6 +219,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * */ memset(options->log_facility, 0, sizeof(options->log_facility)); memset(options->log_file, 0, sizeof(options->log_file)); + options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL; /* standby clone settings * ----------------------- */ @@ -354,6 +355,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->log_level, value, MAXLEN); else if (strcmp(name, "log_facility") == 0) strncpy(options->log_facility, value, MAXLEN); + else if (strcmp(name, "log_status_interval") == 0) + options->log_status_interval = repmgr_atoi(value, name, error_list, 0); /* standby clone settings */ else if (strcmp(name, "use_replication_slots") == 0) diff --git a/config.h b/config.h index 94c2c508..c836837e 100644 --- a/config.h +++ b/config.h @@ -62,6 +62,7 @@ typedef struct char log_level[MAXLEN]; char log_facility[MAXLEN]; char log_file[MAXLEN]; + int log_status_interval; /* standby clone settings */ bool use_replication_slots; @@ -116,7 +117,7 @@ typedef struct /* node information */ \ UNKNOWN_NODE_ID, NO_UPSTREAM_NODE, "", "", "", "", REPLICATION_TYPE_PHYSICAL, \ /* log settings */ \ - "", "", "", \ + "", "", "", DEFAULT_LOG_STATUS_INTERVAL, \ /* standby clone settings */ \ false, "", "", "", "", { NULL, NULL }, \ /* repmgrd settings */ \ diff --git a/doc/changes-in-repmgr4.md b/doc/changes-in-repmgr4.md index b3b0e6fe..b7690380 100644 --- a/doc/changes-in-repmgr4.md +++ b/doc/changes-in-repmgr4.md @@ -37,4 +37,5 @@ Logging changes - `logfile` has been renamed to `log_file` - `logfacility` has been renamed to `log_facility` - default value for `log_level` is `INFO` rather than `NOTICE`. +- new parameter `log_status_interval` diff --git a/log.h b/log.h index a8f02024..371bd6aa 100644 --- a/log.h +++ b/log.h @@ -14,6 +14,8 @@ #define OM_COMMAND_LINE 1 #define OM_DAEMON 2 +#define DEFAULT_LOG_STATUS_INTERVAL 300 + extern void stderr_log_with_level(const char *level_name, int level, const char *fmt,...) __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 02417cb3..3c8aebee 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -77,7 +77,8 @@ # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER #log_file='' # stderr can be redirected to an arbitrary file: - +#log_status_interval=300 # interval (in seconds) for repmgrd to log a message confirming + # its current status; setting to 0 disables this message #------------------------------------------------------------------------------ # Event notification settings diff --git a/repmgrd.c b/repmgrd.c index b8dba728..c4043acf 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -10,6 +10,8 @@ #include #include +#include "portability/instr_time.h" + #include "repmgr.h" #include "config.h" #include "voting.h" @@ -449,7 +451,8 @@ start_monitoring(void) static void monitor_streaming_primary(void) { - NodeStatus node_status = NODE_STATUS_UP; + NodeStatus node_status = NODE_STATUS_UP; + instr_time log_status_interval_start; /* Log startup event */ if (startup_event_logged == false) @@ -467,11 +470,17 @@ monitor_streaming_primary(void) local_node_info.node_id); } + INSTR_TIME_SET_CURRENT(log_status_interval_start); + while (true) { + double log_status_interval_elapsed = 0; + instr_time log_status_interval_current; + // cache node list here, refresh at `node_list_refresh_interval` if (is_server_available(local_node_info.conninfo) == false) { + /* node is down, we were expecting it to be up */ if (node_status == NODE_STATUS_UP) { @@ -499,8 +508,22 @@ monitor_streaming_primary(void) } } - loop: + /* emit "still alive" log message at regular intervals, if requested */ + if (config_file_options.log_status_interval > 0) + { + INSTR_TIME_SET_CURRENT(log_status_interval_current); + INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start); + log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current); + + if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval) + { + log_info(_("monitoring primary node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id); + INSTR_TIME_SET_CURRENT(log_status_interval_start); + } + } sleep(1); } } @@ -509,7 +532,8 @@ monitor_streaming_primary(void) static void monitor_streaming_standby(void) { - NodeStatus upstream_node_status = NODE_STATUS_UP; + NodeStatus upstream_node_status = NODE_STATUS_UP; + instr_time log_status_interval_start; // check result (void) get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info); @@ -531,16 +555,23 @@ monitor_streaming_standby(void) NULL); startup_event_logged = true; - log_notice(_("monitoring node \"%s\" (node ID: %i)"), + log_notice(_("repmgrd on node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id, upstream_node_info.node_name, upstream_node_info.node_id); } + INSTR_TIME_SET_CURRENT(log_status_interval_start); while (true) { + double log_status_interval_elapsed = 0; + instr_time log_status_interval_current; + if (is_server_available(upstream_node_info.conninfo) == false) { + /* upstream node is down, we were expecting it to be up */ if (upstream_node_status == NODE_STATUS_UP) { @@ -615,6 +646,24 @@ monitor_streaming_standby(void) } loop: + + /* emit "still alive" log message at regular intervals, if requested */ + if (config_file_options.log_status_interval > 0) + { + INSTR_TIME_SET_CURRENT(log_status_interval_current); + INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start); + log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current); + if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval) + { + log_info(_("node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id, + upstream_node_info.node_name, + upstream_node_info.node_id); + + INSTR_TIME_SET_CURRENT(log_status_interval_start); + } + } sleep(1); } } @@ -641,7 +690,7 @@ promote_self(void) log_debug("promote command is:\n \"%s\"", promote_command); - if (log_type == REPMGR_STDERR && *config_file_options.logfile) + if (log_type == REPMGR_STDERR && *config_file_options.log_file) { fflush(stderr); }