diff --git a/configdata.c b/configdata.c index 4b482945..45e25a19 100644 --- a/configdata.c +++ b/configdata.c @@ -606,7 +606,17 @@ struct ConfigFileSetting config_file_settings[] = "primary_visibility_consensus", CONFIG_BOOL, { .boolptr = &config_file_options.primary_visibility_consensus }, - { .booldefault = DEFAULT_PRIMARY_VISIBILITY_CONSENSUS }, + { .booldefault = DEFAULT_PRIMARY_VISIBILITY_CONSENSUS }, + {}, + {}, + {} + }, + /* always_promote */ + { + "always_promote", + CONFIG_BOOL, + { .boolptr = &config_file_options.always_promote }, + { .booldefault = DEFAULT_ALWAYS_PROMOTE }, {}, {}, {} diff --git a/configfile.c b/configfile.c index e6e06625..62439a57 100644 --- a/configfile.c +++ b/configfile.c @@ -867,6 +867,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL * - monitoring_history * - primary_notification_timeout * - primary_visibility_consensus + * - always_promote * - promote_command * - reconnect_attempts * - reconnect_interval @@ -1239,6 +1240,15 @@ reload_config(t_server_type server_type) format_bool(config_file_options.primary_visibility_consensus)); } + /* always_promote */ + if (config_file_options.always_promote != orig_config_file_options.always_promote) + { + item_list_append_format(&config_changes, + _("\"always_promote\" changed from \"%s\" to \"%s\""), + format_bool(orig_config_file_options.always_promote), + format_bool(config_file_options.always_promote)); + } + /* failover_validation_command */ if (strncmp(config_file_options.failover_validation_command, orig_config_file_options.failover_validation_command, sizeof(config_file_options.failover_validation_command)) != 0) { diff --git a/configfile.h b/configfile.h index b0791bb7..12440897 100644 --- a/configfile.h +++ b/configfile.h @@ -201,6 +201,7 @@ typedef struct int sibling_nodes_disconnect_timeout; ConnectionCheckType connection_check_type; bool primary_visibility_consensus; + bool always_promote; char failover_validation_command[MAXPGPATH]; int election_rerun_interval; int child_nodes_check_interval; diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index 940bb7cd..91440806 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -127,6 +127,14 @@ + + + + Configuration option always_promote (default: false) + to control whether a node should be promoted if the &repmgr; metadata is not up-to-date + on that node. + + diff --git a/doc/repmgrd-configuration.xml b/doc/repmgrd-configuration.xml index 647a1f94..fae0847a 100644 --- a/doc/repmgrd-configuration.xml +++ b/doc/repmgrd-configuration.xml @@ -419,6 +419,33 @@ + + + + + + always_promote + + + + Default: false. + + + If true, promote the local node even if its + &repmgr; metadata is not up-to-date. + + + Normally &repmgr; expects its metadata (stored in the repmgr.nodes + table) to be up-to-date so &repmgrd; can take the correct action during a failover. + However it's possible that updates made on the primary may not + have propagated to the standby (promotion candidate). In this case &repmgrd; will + default to not promoting the standby. This behaviour can be overridden by setting + to true. + + + + + @@ -765,6 +792,12 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop' + + + always_promote + + + promote_command diff --git a/repmgr.conf.sample b/repmgr.conf.sample index cea903d1..b6c69302 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -342,6 +342,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # WAL receivers #primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen # the primary node recently. *Must* be the same on all nodes. +#always_promote=false # Always promote a node, even if repmgr metadata is outdated #failover_validation_command='' # Script to execute for an external mechanism to validate the failover # decision made by repmgrd. One or both of the following parameter placeholders # should be provided, which will be replaced by repmgrd with the appropriate diff --git a/repmgr.h b/repmgr.h index e54fc941..6e875cbe 100644 --- a/repmgr.h +++ b/repmgr.h @@ -126,7 +126,7 @@ #define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */ #define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #define DEFAULT_LOCATION "default" -#define DEFAULT_PRIORITY 100 +#define DEFAULT_PRIORITY 100 #define DEFAULT_MONITORING_INTERVAL 2 /* seconds */ #define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */ #define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */ @@ -139,6 +139,7 @@ #define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ #define DEFAULT_CONNECTION_CHECK_TYPE CHECK_PING #define DEFAULT_PRIMARY_VISIBILITY_CONSENSUS false +#define DEFAULT_ALWAYS_PROMOTE false #define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */ #define DEFAULT_CHILD_NODES_CHECK_INTERVAL 5 /* seconds */ #define DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT -1 diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 321ad2c5..72dff5db 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -764,9 +764,25 @@ check_primary_status(int degraded_monitoring_elapsed) } else { - appendPQExpBuffer(&event_details, - _("node has become a standby, monitoring connection to upstream node %i"), - local_node_info.upstream_node_id); + if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID) + { + /* + * If upstream_node_id is not set, it's possible that following a switchover + * of some kind (possibly forced in some way), the updated node record has + * not yet propagated to the local node. In this case however we can safely + * assume we're monitoring the primary. + */ + + appendPQExpBuffer(&event_details, + _("node has become a standby, monitoring connection to primary node %i"), + primary_node_id); + } + else + { + appendPQExpBuffer(&event_details, + _("node has become a standby, monitoring connection to upstream node %i"), + local_node_info.upstream_node_id); + } } create_event_notification(new_primary_conn, @@ -3199,6 +3215,7 @@ update_monitoring_history(void) if (primary_last_wal_location >= replication_info.last_wal_receive_lsn) { replication_lag_bytes = (long long unsigned int) (primary_last_wal_location - replication_info.last_wal_receive_lsn); + log_debug("replication lag in bytes is: %llu", replication_lag_bytes); } else { @@ -3482,6 +3499,14 @@ do_upstream_standby_failover(void) } +/* + * This promotes the local node using the "promote_command" configuration + * parameter, which must be either "repmgr standby promote" or a script which + * at some point executes "repmgr standby promote". + * + * TODO: make "promote_command" and execute the same code used by + * "repmgr standby promote". + */ static FailoverState promote_self(void) { @@ -3504,13 +3529,43 @@ promote_self(void) sleep(config_file_options.promote_delay); } - record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &failed_primary); - - if (record_status != RECORD_FOUND) + if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID) { - log_error(_("unable to retrieve metadata record for failed upstream (ID: %i)"), - local_node_info.upstream_node_id); - return FAILOVER_STATE_PROMOTION_FAILED; + /* + * This is a corner-case situation where the repmgr metadata on the + * promotion candidate is outdated and the local node's upstream_node_id + * is not set. This is often an indication of potentially serious issues, + * such as the local node being very far behind the primary, or not being + * attached at all. + * + * In this case it may be desirable to restore the original primary. + * This behaviour can be controlled by the "always_promote" configuration option. + */ + if (config_file_options.always_promote == false) + { + log_error(_("this node (ID: %i) does not have its upstream_node_id set, not promoting"), + local_node_info.node_id); + log_detail(_("the local node's metadata has not been updated since it became a standby")); + log_hint(_("set \"always_promote\" to \"true\" to force promotion in this situation")); + return FAILOVER_STATE_PROMOTION_FAILED; + } + else + { + log_warning(_("this node (ID: %i) does not have its upstream_node_id set, promoting anyway"), + local_node_info.node_id); + log_detail(_("\"always_promote\" is set to \"true\" ")); + } + } + else + { + record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &failed_primary); + + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve metadata record for failed upstream (ID: %i)"), + local_node_info.upstream_node_id); + return FAILOVER_STATE_PROMOTION_FAILED; + } } /* the presence of this command has been established already */