diff --git a/configfile.c b/configfile.c index 56c548df..c07c853e 100644 --- a/configfile.c +++ b/configfile.c @@ -242,13 +242,14 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * memset(options->promote_command, 0, sizeof(options->promote_command)); memset(options->follow_command, 0, sizeof(options->follow_command)); options->monitor_interval_secs = DEFAULT_STATS_REPORTING_INTERVAL; - options->primary_response_timeout = 60; /* default to 6 reconnection attempts at intervals of 10 seconds */ options->reconnect_attempts = DEFAULT_RECONNECTION_ATTEMPTS; options->reconnect_interval = DEFAULT_RECONNECTION_INTERVAL; - options->retry_promote_interval_secs = 300; options->monitoring_history = false; /* new in 4.0, replaces --monitoring-history */ options->degraded_monitoring_timeout = -1; + options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT; + options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT; + options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT; /* BDR settings * ------------ */ @@ -415,12 +416,16 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->reconnect_interval = repmgr_atoi(value, name, error_list, 0); else if (strcmp(name, "monitor_interval_secs") == 0) options->monitor_interval_secs = repmgr_atoi(value, name, error_list, 1); - else if (strcmp(name, "retry_promote_interval_secs") == 0) - options->retry_promote_interval_secs = repmgr_atoi(value, name, error_list, 1); else if (strcmp(name, "monitoring_history") == 0) options->monitoring_history = parse_bool(value, name, error_list); else if (strcmp(name, "degraded_monitoring_timeout") == 0) options->degraded_monitoring_timeout = repmgr_atoi(value, name, error_list, 1); + else if (strcmp(name, "async_query_timeout") == 0) + options->async_query_timeout = repmgr_atoi(value, name, error_list, 0); + else if (strcmp(name, "primary_notification_timeout") == 0) + options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0); + else if (strcmp(name, "primary_follow_timeout") == 0) + options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0); /* BDR settings */ else if (strcmp(name, "bdr_local_monitoring_only") == 0) @@ -488,19 +493,31 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * else if (strcmp(name, "loglevel") == 0) { item_list_append(warning_list, - _("parameter \"loglevel\" has been enamed to \"log_level\"")); + _("parameter \"loglevel\" has been renamed to \"log_level\"")); known_parameter = false; } else if (strcmp(name, "logfacility") == 0) { item_list_append(warning_list, - _("parameter \"logfacility\" has been enamed to \"log_facility\"")); + _("parameter \"logfacility\" has been renamed to \"log_facility\"")); known_parameter = false; } else if (strcmp(name, "logfile") == 0) { item_list_append(warning_list, - _("parameter \"logfile\" has been enamed to \"log_file\"")); + _("parameter \"logfile\" has been renamed to \"log_file\"")); + known_parameter = false; + } + else if (strcmp(name, "master_reponse_timeout") == 0) + { + item_list_append(warning_list, + _("parameter \"master_reponse_timeout\" has been removed; use \"async_query_timeout\" instead")); + known_parameter = false; + } + else if (strcmp(name, "master_reponse_timeout") == 0) + { + item_list_append(warning_list, + _("parameter \"retry_promote_interval_secs\" has been removed; use \"primary_notification_timeout\" instead")); known_parameter = false; } else diff --git a/configfile.h b/configfile.h index 073d8188..142c77ef 100644 --- a/configfile.h +++ b/configfile.h @@ -78,12 +78,13 @@ typedef struct char promote_command[MAXLEN]; char follow_command[MAXLEN]; int monitor_interval_secs; - int primary_response_timeout; int reconnect_attempts; int reconnect_interval; - int retry_promote_interval_secs; bool monitoring_history; int degraded_monitoring_timeout; + int async_query_timeout; + int primary_notification_timeout; + int primary_follow_timeout; /* BDR settings */ bool bdr_local_monitoring_only; @@ -125,10 +126,12 @@ typedef struct /* repmgrd settings */ \ FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \ DEFAULT_STATS_REPORTING_INTERVAL, \ - 60, \ DEFAULT_RECONNECTION_ATTEMPTS, \ DEFAULT_RECONNECTION_INTERVAL, \ - 300, false, -1, \ + false, -1, \ + DEFAULT_ASYNC_QUERY_TIMEOUT, \ + DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ + DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \ /* BDR settings */ \ false, false, \ /* service settings */ \ diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index e993c602..68a01c31 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1293,7 +1293,7 @@ do_standby_follow(void) * seconds before giving up */ - for (timer = 0; timer < config_file_options.primary_response_timeout; timer++) + for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++) { primary_conn = get_primary_connection_quiet(local_conn, &primary_id, diff --git a/repmgr-client.c b/repmgr-client.c index a5b5f21d..a5f499be 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -1320,6 +1320,7 @@ do_help(void) #endif printf(_(" %s [OPTIONS] bdr register\n"), progname()); printf(_(" %s [OPTIONS] bdr unregister\n"), progname()); + printf(_(" %s [OPTIONS] node status\n"), progname()); printf(_(" %s [OPTIONS] cluster show\n"), progname()); printf(_(" %s [OPTIONS] cluster event\n"), progname()); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 95175d77..cdc4da99 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -72,14 +72,14 @@ # This is mainly intended for those cases when `repmgr` is executed directly # by `repmgrd`. -#log_level=INFO # Log level: possible values are DEBUG, INFO, NOTICE, - # WARNING, ERROR, ALERT, CRIT or EMERG +#log_level=INFO # Log level: possible values are DEBUG, INFO, NOTICE, + # WARNING, ERROR, ALERT, CRIT or EMERG -#log_facility=STDERR # Logging facility: possible values are STDERR, or for - # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER +#log_facility=STDERR # Logging facility: possible values are STDERR, or for + # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER -#log_file='' # stderr can be redirected to an arbitrary file: -#log_status_interval=300 # interval (in seconds) for repmgrd to log a status message +#log_file='' # stderr can be redirected to an arbitrary file: +#log_status_interval=300 # interval (in seconds) for repmgrd to log a status message #------------------------------------------------------------------------------ @@ -105,24 +105,24 @@ # # event_notifications=master_register,standby_register -#event_notification_command='' # An external program or script which - # can be executed by the user under which - # repmgr/repmgrd are run. +#event_notification_command='' # An external program or script which + # can be executed by the user under which + # repmgr/repmgrd are run. -#event_notifications='' # A commas-separated list of notification - # types +#event_notifications='' # A commas-separated list of notification + # types #------------------------------------------------------------------------------ # Environment/command settings #------------------------------------------------------------------------------ -#pg_bindir='' # Path to PostgreSQL binary directory (location - # of pg_ctl, pg_basebackup etc.). Only needed - # if these files are not in the system $PATH. - # - # Debian/Ubuntu users: you will probably need to - # set this to the directory where `pg_ctl` is located, - # e.g. /usr/lib/postgresql/9.6/bin/ +#pg_bindir='' # Path to PostgreSQL binary directory (location + # of pg_ctl, pg_basebackup etc.). Only needed + # if these files are not in the system $PATH. + # + # Debian/Ubuntu users: you will probably need to + # set this to the directory where `pg_ctl` is located, + # e.g. /usr/lib/postgresql/9.6/bin/ #------------------------------------------------------------------------------ @@ -138,10 +138,10 @@ # rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" # ssh_options=-o "StrictHostKeyChecking no" -pg_ctl_options='' # Options to append to "pg_ctl" -pg_basebackup_options='' # Options to append to "pg_basebackup" -rsync_options='' # Options to append to "rsync" -ssh_options='' # Options to append to "ssh" +pg_ctl_options='' # Options to append to "pg_ctl" +pg_basebackup_options='' # Options to append to "pg_basebackup" +rsync_options='' # Options to append to "rsync" +ssh_options='' # Options to append to "ssh" @@ -157,11 +157,21 @@ ssh_options='' # Options to append to "ssh" # tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace # restore_command = 'cp /path/to/archived/wals/%f %p' -#tablespace_mapping='' # Tablespaces can be remapped from one - # file system location to another +#tablespace_mapping='' # Tablespaces can be remapped from one + # file system location to another -#restore_command='' # This will be placed in the recovery.conf - # file generated by repmgr +#restore_command='' # This will be placed in the recovery.conf + # file generated by repmgr + +#------------------------------------------------------------------------------ +# Standby follow settings +#------------------------------------------------------------------------------ + +# These settings apply when instructing a standby to follow the new primary +# ("repmgr standby follow"). + +#primary_follow_timeout=60 # The length of time (in seconds) to wait + # for the new primary to become available #------------------------------------------------------------------------------ @@ -175,7 +185,7 @@ ssh_options='' # Options to append to "ssh" # in a non-standard location) #------------------------------------------------------------------------------ -# Failover settings (repmgrd) +# Failover and monitoring settings (repmgrd) #------------------------------------------------------------------------------ # # These settings are only applied when repmgrd is running. Values shown @@ -183,10 +193,15 @@ ssh_options='' # Options to append to "ssh" #monitoring_history=no +#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby + # will wait for a notification from the new primary, + # before falling back to degraded monitoriong #degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd # will terminate if the server being monitored - # is no longer available. -1 (default) - # disables completely. + # is no longer available. -1 (default) disables the + # timeout completely. +#async_query_timeout=60 # Interval (in seconds) which repmgrd will wait before + # cancelling an asynchronous query. #------------------------------------------------------------------------------ # service control commands @@ -216,3 +231,9 @@ ssh_options='' # Options to append to "ssh" #service_reload_command = '' #service_promote_command = '' # Note: this overrides any value contained # in the setting "promote_command" + +#------------------------------------------------------------------------------ +# BDR monitoring options +#------------------------------------------------------------------------------ + +#bdr_active_node_recovery=false # \ No newline at end of file diff --git a/repmgr.h b/repmgr.h index 4fca7ba5..a99671d0 100644 --- a/repmgr.h +++ b/repmgr.h @@ -43,6 +43,9 @@ #define DEFAULT_RECONNECTION_ATTEMPTS 6 #define DEFAULT_RECONNECTION_INTERVAL 10 #define DEFAULT_STATS_REPORTING_INTERVAL 2 +#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 +#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 +#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 #define FAILOVER_NODES_MAX_CHECK 50 diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 0cbf5f54..df42726b 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -749,7 +749,7 @@ monitor_streaming_standby(void) check_connection(&local_node_info, local_conn); - sleep(1); + sleep(config_file_options.monitor_interval_secs); } #endif } @@ -1336,11 +1336,9 @@ poll_best_candidate(NodeInfoList *standby_nodes) static bool wait_primary_notification(int *new_primary_id) { - // XXX make this configurable - int wait_primary_timeout = 60; int i; - for (i = 0; i < wait_primary_timeout; i++) + for (i = 0; i < config_file_options.primary_notification_timeout; i++) { if (get_new_primary(local_conn, new_primary_id) == true) { @@ -1353,7 +1351,7 @@ wait_primary_notification(int *new_primary_id) log_warning(_("no notification received from new primary after %i seconds"), - wait_primary_timeout); + config_file_options.primary_notification_timeout); monitoring_state = MS_DEGRADED; INSTR_TIME_SET_CURRENT(degraded_monitoring_start); @@ -1866,7 +1864,7 @@ close_connections_physical() { /* cancel any pending queries to the primary */ if (PQisBusy(primary_conn) == 1) - cancel_query(primary_conn, config_file_options.primary_response_timeout); + cancel_query(primary_conn, config_file_options.async_query_timeout); PQfinish(primary_conn); primary_conn = NULL; }