Add configuration option "log_status_interval"

Normally (outside of log level DEBUG) repmgrd doesn't generate any
kind of log output, so examining the log file may give the impression
it's not working. Output an informational message at regular intervals
to show it's up and running.
This commit is contained in:
Ian Barwick
2017-07-03 12:14:52 +09:00
parent 34c746fcde
commit 18b40be7f1
6 changed files with 64 additions and 7 deletions

View File

@@ -219,6 +219,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
*/ */
memset(options->log_facility, 0, sizeof(options->log_facility)); memset(options->log_facility, 0, sizeof(options->log_facility));
memset(options->log_file, 0, sizeof(options->log_file)); memset(options->log_file, 0, sizeof(options->log_file));
options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL;
/* standby clone settings /* standby clone settings
* ----------------------- */ * ----------------------- */
@@ -354,6 +355,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
strncpy(options->log_level, value, MAXLEN); strncpy(options->log_level, value, MAXLEN);
else if (strcmp(name, "log_facility") == 0) else if (strcmp(name, "log_facility") == 0)
strncpy(options->log_facility, value, MAXLEN); strncpy(options->log_facility, value, MAXLEN);
else if (strcmp(name, "log_status_interval") == 0)
options->log_status_interval = repmgr_atoi(value, name, error_list, 0);
/* standby clone settings */ /* standby clone settings */
else if (strcmp(name, "use_replication_slots") == 0) else if (strcmp(name, "use_replication_slots") == 0)

View File

@@ -62,6 +62,7 @@ typedef struct
char log_level[MAXLEN]; char log_level[MAXLEN];
char log_facility[MAXLEN]; char log_facility[MAXLEN];
char log_file[MAXLEN]; char log_file[MAXLEN];
int log_status_interval;
/* standby clone settings */ /* standby clone settings */
bool use_replication_slots; bool use_replication_slots;
@@ -116,7 +117,7 @@ typedef struct
/* node information */ \ /* node information */ \
UNKNOWN_NODE_ID, NO_UPSTREAM_NODE, "", "", "", "", REPLICATION_TYPE_PHYSICAL, \ UNKNOWN_NODE_ID, NO_UPSTREAM_NODE, "", "", "", "", REPLICATION_TYPE_PHYSICAL, \
/* log settings */ \ /* log settings */ \
"", "", "", \ "", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
/* standby clone settings */ \ /* standby clone settings */ \
false, "", "", "", "", { NULL, NULL }, \ false, "", "", "", "", { NULL, NULL }, \
/* repmgrd settings */ \ /* repmgrd settings */ \

View File

@@ -37,4 +37,5 @@ Logging changes
- `logfile` has been renamed to `log_file` - `logfile` has been renamed to `log_file`
- `logfacility` has been renamed to `log_facility` - `logfacility` has been renamed to `log_facility`
- default value for `log_level` is `INFO` rather than `NOTICE`. - default value for `log_level` is `INFO` rather than `NOTICE`.
- new parameter `log_status_interval`

2
log.h
View File

@@ -14,6 +14,8 @@
#define OM_COMMAND_LINE 1 #define OM_COMMAND_LINE 1
#define OM_DAEMON 2 #define OM_DAEMON 2
#define DEFAULT_LOG_STATUS_INTERVAL 300
extern void extern void
stderr_log_with_level(const char *level_name, int level, const char *fmt,...) stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));

View File

@@ -77,7 +77,8 @@
# syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER
#log_file='' # stderr can be redirected to an arbitrary file: #log_file='' # stderr can be redirected to an arbitrary file:
#log_status_interval=300 # interval (in seconds) for repmgrd to log a message confirming
# its current status; setting to 0 disables this message
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# Event notification settings # Event notification settings

View File

@@ -10,6 +10,8 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <time.h> #include <time.h>
#include "portability/instr_time.h"
#include "repmgr.h" #include "repmgr.h"
#include "config.h" #include "config.h"
#include "voting.h" #include "voting.h"
@@ -449,7 +451,8 @@ start_monitoring(void)
static void static void
monitor_streaming_primary(void) monitor_streaming_primary(void)
{ {
NodeStatus node_status = NODE_STATUS_UP; NodeStatus node_status = NODE_STATUS_UP;
instr_time log_status_interval_start;
/* Log startup event */ /* Log startup event */
if (startup_event_logged == false) if (startup_event_logged == false)
@@ -467,11 +470,17 @@ monitor_streaming_primary(void)
local_node_info.node_id); local_node_info.node_id);
} }
INSTR_TIME_SET_CURRENT(log_status_interval_start);
while (true) while (true)
{ {
double log_status_interval_elapsed = 0;
instr_time log_status_interval_current;
// cache node list here, refresh at `node_list_refresh_interval` // cache node list here, refresh at `node_list_refresh_interval`
if (is_server_available(local_node_info.conninfo) == false) if (is_server_available(local_node_info.conninfo) == false)
{ {
/* node is down, we were expecting it to be up */ /* node is down, we were expecting it to be up */
if (node_status == NODE_STATUS_UP) if (node_status == NODE_STATUS_UP)
{ {
@@ -499,8 +508,22 @@ monitor_streaming_primary(void)
} }
} }
loop: loop:
/* emit "still alive" log message at regular intervals, if requested */
if (config_file_options.log_status_interval > 0)
{
INSTR_TIME_SET_CURRENT(log_status_interval_current);
INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start);
log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current);
if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval)
{
log_info(_("monitoring primary node \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id);
INSTR_TIME_SET_CURRENT(log_status_interval_start);
}
}
sleep(1); sleep(1);
} }
} }
@@ -509,7 +532,8 @@ monitor_streaming_primary(void)
static void static void
monitor_streaming_standby(void) monitor_streaming_standby(void)
{ {
NodeStatus upstream_node_status = NODE_STATUS_UP; NodeStatus upstream_node_status = NODE_STATUS_UP;
instr_time log_status_interval_start;
// check result // check result
(void) get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info); (void) get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info);
@@ -531,16 +555,23 @@ monitor_streaming_standby(void)
NULL); NULL);
startup_event_logged = true; startup_event_logged = true;
log_notice(_("monitoring node \"%s\" (node ID: %i)"), log_notice(_("repmgrd on node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id,
upstream_node_info.node_name, upstream_node_info.node_name,
upstream_node_info.node_id); upstream_node_info.node_id);
} }
INSTR_TIME_SET_CURRENT(log_status_interval_start);
while (true) while (true)
{ {
double log_status_interval_elapsed = 0;
instr_time log_status_interval_current;
if (is_server_available(upstream_node_info.conninfo) == false) if (is_server_available(upstream_node_info.conninfo) == false)
{ {
/* upstream node is down, we were expecting it to be up */ /* upstream node is down, we were expecting it to be up */
if (upstream_node_status == NODE_STATUS_UP) if (upstream_node_status == NODE_STATUS_UP)
{ {
@@ -615,6 +646,24 @@ monitor_streaming_standby(void)
} }
loop: loop:
/* emit "still alive" log message at regular intervals, if requested */
if (config_file_options.log_status_interval > 0)
{
INSTR_TIME_SET_CURRENT(log_status_interval_current);
INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start);
log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current);
if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval)
{
log_info(_("node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id,
upstream_node_info.node_name,
upstream_node_info.node_id);
INSTR_TIME_SET_CURRENT(log_status_interval_start);
}
}
sleep(1); sleep(1);
} }
} }
@@ -641,7 +690,7 @@ promote_self(void)
log_debug("promote command is:\n \"%s\"", log_debug("promote command is:\n \"%s\"",
promote_command); promote_command);
if (log_type == REPMGR_STDERR && *config_file_options.logfile) if (log_type == REPMGR_STDERR && *config_file_options.log_file)
{ {
fflush(stderr); fflush(stderr);
} }