diff --git a/config.c b/config.c index ebfdc6be..e857cdea 100644 --- a/config.c +++ b/config.c @@ -219,6 +219,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * */ memset(options->log_facility, 0, sizeof(options->log_facility)); memset(options->log_file, 0, sizeof(options->log_file)); + options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL; /* standby clone settings * ----------------------- */ @@ -354,6 +355,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->log_level, value, MAXLEN); else if (strcmp(name, "log_facility") == 0) strncpy(options->log_facility, value, MAXLEN); + else if (strcmp(name, "log_status_interval") == 0) + options->log_status_interval = repmgr_atoi(value, name, error_list, 0); /* standby clone settings */ else if (strcmp(name, "use_replication_slots") == 0) diff --git a/config.h b/config.h index 94c2c508..c836837e 100644 --- a/config.h +++ b/config.h @@ -62,6 +62,7 @@ typedef struct char log_level[MAXLEN]; char log_facility[MAXLEN]; char log_file[MAXLEN]; + int log_status_interval; /* standby clone settings */ bool use_replication_slots; @@ -116,7 +117,7 @@ typedef struct /* node information */ \ UNKNOWN_NODE_ID, NO_UPSTREAM_NODE, "", "", "", "", REPLICATION_TYPE_PHYSICAL, \ /* log settings */ \ - "", "", "", \ + "", "", "", DEFAULT_LOG_STATUS_INTERVAL, \ /* standby clone settings */ \ false, "", "", "", "", { NULL, NULL }, \ /* repmgrd settings */ \ diff --git a/doc/changes-in-repmgr4.md b/doc/changes-in-repmgr4.md index b3b0e6fe..b7690380 100644 --- a/doc/changes-in-repmgr4.md +++ b/doc/changes-in-repmgr4.md @@ -37,4 +37,5 @@ Logging changes - `logfile` has been renamed to `log_file` - `logfacility` has been renamed to `log_facility` - default value for `log_level` is `INFO` rather than `NOTICE`. +- new parameter `log_status_interval` diff --git a/log.h b/log.h index a8f02024..371bd6aa 100644 --- a/log.h +++ b/log.h @@ -14,6 +14,8 @@ #define OM_COMMAND_LINE 1 #define OM_DAEMON 2 +#define DEFAULT_LOG_STATUS_INTERVAL 300 + extern void stderr_log_with_level(const char *level_name, int level, const char *fmt,...) __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 02417cb3..3c8aebee 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -77,7 +77,8 @@ # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER #log_file='' # stderr can be redirected to an arbitrary file: - +#log_status_interval=300 # interval (in seconds) for repmgrd to log a message confirming + # its current status; setting to 0 disables this message #------------------------------------------------------------------------------ # Event notification settings diff --git a/repmgrd.c b/repmgrd.c index b8dba728..c4043acf 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -10,6 +10,8 @@ #include #include +#include "portability/instr_time.h" + #include "repmgr.h" #include "config.h" #include "voting.h" @@ -449,7 +451,8 @@ start_monitoring(void) static void monitor_streaming_primary(void) { - NodeStatus node_status = NODE_STATUS_UP; + NodeStatus node_status = NODE_STATUS_UP; + instr_time log_status_interval_start; /* Log startup event */ if (startup_event_logged == false) @@ -467,11 +470,17 @@ monitor_streaming_primary(void) local_node_info.node_id); } + INSTR_TIME_SET_CURRENT(log_status_interval_start); + while (true) { + double log_status_interval_elapsed = 0; + instr_time log_status_interval_current; + // cache node list here, refresh at `node_list_refresh_interval` if (is_server_available(local_node_info.conninfo) == false) { + /* node is down, we were expecting it to be up */ if (node_status == NODE_STATUS_UP) { @@ -499,8 +508,22 @@ monitor_streaming_primary(void) } } - loop: + /* emit "still alive" log message at regular intervals, if requested */ + if (config_file_options.log_status_interval > 0) + { + INSTR_TIME_SET_CURRENT(log_status_interval_current); + INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start); + log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current); + + if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval) + { + log_info(_("monitoring primary node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id); + INSTR_TIME_SET_CURRENT(log_status_interval_start); + } + } sleep(1); } } @@ -509,7 +532,8 @@ monitor_streaming_primary(void) static void monitor_streaming_standby(void) { - NodeStatus upstream_node_status = NODE_STATUS_UP; + NodeStatus upstream_node_status = NODE_STATUS_UP; + instr_time log_status_interval_start; // check result (void) get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info); @@ -531,16 +555,23 @@ monitor_streaming_standby(void) NULL); startup_event_logged = true; - log_notice(_("monitoring node \"%s\" (node ID: %i)"), + log_notice(_("repmgrd on node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id, upstream_node_info.node_name, upstream_node_info.node_id); } + INSTR_TIME_SET_CURRENT(log_status_interval_start); while (true) { + double log_status_interval_elapsed = 0; + instr_time log_status_interval_current; + if (is_server_available(upstream_node_info.conninfo) == false) { + /* upstream node is down, we were expecting it to be up */ if (upstream_node_status == NODE_STATUS_UP) { @@ -615,6 +646,24 @@ monitor_streaming_standby(void) } loop: + + /* emit "still alive" log message at regular intervals, if requested */ + if (config_file_options.log_status_interval > 0) + { + INSTR_TIME_SET_CURRENT(log_status_interval_current); + INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start); + log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current); + if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval) + { + log_info(_("node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"), + local_node_info.node_name, + local_node_info.node_id, + upstream_node_info.node_name, + upstream_node_info.node_id); + + INSTR_TIME_SET_CURRENT(log_status_interval_start); + } + } sleep(1); } } @@ -641,7 +690,7 @@ promote_self(void) log_debug("promote command is:\n \"%s\"", promote_command); - if (log_type == REPMGR_STDERR && *config_file_options.logfile) + if (log_type == REPMGR_STDERR && *config_file_options.log_file) { fflush(stderr); }