diff --git a/configdata.c b/configdata.c
index 4b482945..45e25a19 100644
--- a/configdata.c
+++ b/configdata.c
@@ -606,7 +606,17 @@ struct ConfigFileSetting config_file_settings[] =
"primary_visibility_consensus",
CONFIG_BOOL,
{ .boolptr = &config_file_options.primary_visibility_consensus },
- { .booldefault = DEFAULT_PRIMARY_VISIBILITY_CONSENSUS },
+ { .booldefault = DEFAULT_PRIMARY_VISIBILITY_CONSENSUS },
+ {},
+ {},
+ {}
+ },
+ /* always_promote */
+ {
+ "always_promote",
+ CONFIG_BOOL,
+ { .boolptr = &config_file_options.always_promote },
+ { .booldefault = DEFAULT_ALWAYS_PROMOTE },
{},
{},
{}
diff --git a/configfile.c b/configfile.c
index e6e06625..62439a57 100644
--- a/configfile.c
+++ b/configfile.c
@@ -867,6 +867,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
* - monitoring_history
* - primary_notification_timeout
* - primary_visibility_consensus
+ * - always_promote
* - promote_command
* - reconnect_attempts
* - reconnect_interval
@@ -1239,6 +1240,15 @@ reload_config(t_server_type server_type)
format_bool(config_file_options.primary_visibility_consensus));
}
+ /* always_promote */
+ if (config_file_options.always_promote != orig_config_file_options.always_promote)
+ {
+ item_list_append_format(&config_changes,
+ _("\"always_promote\" changed from \"%s\" to \"%s\""),
+ format_bool(orig_config_file_options.always_promote),
+ format_bool(config_file_options.always_promote));
+ }
+
/* failover_validation_command */
if (strncmp(config_file_options.failover_validation_command, orig_config_file_options.failover_validation_command, sizeof(config_file_options.failover_validation_command)) != 0)
{
diff --git a/configfile.h b/configfile.h
index b0791bb7..12440897 100644
--- a/configfile.h
+++ b/configfile.h
@@ -201,6 +201,7 @@ typedef struct
int sibling_nodes_disconnect_timeout;
ConnectionCheckType connection_check_type;
bool primary_visibility_consensus;
+ bool always_promote;
char failover_validation_command[MAXPGPATH];
int election_rerun_interval;
int child_nodes_check_interval;
diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml
index 940bb7cd..91440806 100644
--- a/doc/appendix-release-notes.xml
+++ b/doc/appendix-release-notes.xml
@@ -127,6 +127,14 @@
+
+
+
+ Configuration option always_promote (default: false)
+ to control whether a node should be promoted if the &repmgr; metadata is not up-to-date
+ on that node.
+
+
diff --git a/doc/repmgrd-configuration.xml b/doc/repmgrd-configuration.xml
index 647a1f94..fae0847a 100644
--- a/doc/repmgrd-configuration.xml
+++ b/doc/repmgrd-configuration.xml
@@ -419,6 +419,33 @@
+
+
+
+
+
+ always_promote
+
+
+
+ Default: false.
+
+
+ If true, promote the local node even if its
+ &repmgr; metadata is not up-to-date.
+
+
+ Normally &repmgr; expects its metadata (stored in the repmgr.nodes
+ table) to be up-to-date so &repmgrd; can take the correct action during a failover.
+ However it's possible that updates made on the primary may not
+ have propagated to the standby (promotion candidate). In this case &repmgrd; will
+ default to not promoting the standby. This behaviour can be overridden by setting
+ to true.
+
+
+
+
+
@@ -765,6 +792,12 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop'
+
+
+ always_promote
+
+
+
promote_command
diff --git a/repmgr.conf.sample b/repmgr.conf.sample
index cea903d1..b6c69302 100644
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -342,6 +342,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# WAL receivers
#primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen
# the primary node recently. *Must* be the same on all nodes.
+#always_promote=false # Always promote a node, even if repmgr metadata is outdated
#failover_validation_command='' # Script to execute for an external mechanism to validate the failover
# decision made by repmgrd. One or both of the following parameter placeholders
# should be provided, which will be replaced by repmgrd with the appropriate
diff --git a/repmgr.h b/repmgr.h
index e54fc941..6e875cbe 100644
--- a/repmgr.h
+++ b/repmgr.h
@@ -126,7 +126,7 @@
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
#define DEFAULT_LOCATION "default"
-#define DEFAULT_PRIORITY 100
+#define DEFAULT_PRIORITY 100
#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */
#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */
#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */
@@ -139,6 +139,7 @@
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
#define DEFAULT_CONNECTION_CHECK_TYPE CHECK_PING
#define DEFAULT_PRIMARY_VISIBILITY_CONSENSUS false
+#define DEFAULT_ALWAYS_PROMOTE false
#define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */
#define DEFAULT_CHILD_NODES_CHECK_INTERVAL 5 /* seconds */
#define DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT -1
diff --git a/repmgrd-physical.c b/repmgrd-physical.c
index 321ad2c5..72dff5db 100644
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
@@ -764,9 +764,25 @@ check_primary_status(int degraded_monitoring_elapsed)
}
else
{
- appendPQExpBuffer(&event_details,
- _("node has become a standby, monitoring connection to upstream node %i"),
- local_node_info.upstream_node_id);
+ if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID)
+ {
+ /*
+ * If upstream_node_id is not set, it's possible that following a switchover
+ * of some kind (possibly forced in some way), the updated node record has
+ * not yet propagated to the local node. In this case however we can safely
+ * assume we're monitoring the primary.
+ */
+
+ appendPQExpBuffer(&event_details,
+ _("node has become a standby, monitoring connection to primary node %i"),
+ primary_node_id);
+ }
+ else
+ {
+ appendPQExpBuffer(&event_details,
+ _("node has become a standby, monitoring connection to upstream node %i"),
+ local_node_info.upstream_node_id);
+ }
}
create_event_notification(new_primary_conn,
@@ -3199,6 +3215,7 @@ update_monitoring_history(void)
if (primary_last_wal_location >= replication_info.last_wal_receive_lsn)
{
replication_lag_bytes = (long long unsigned int) (primary_last_wal_location - replication_info.last_wal_receive_lsn);
+ log_debug("replication lag in bytes is: %llu", replication_lag_bytes);
}
else
{
@@ -3482,6 +3499,14 @@ do_upstream_standby_failover(void)
}
+/*
+ * This promotes the local node using the "promote_command" configuration
+ * parameter, which must be either "repmgr standby promote" or a script which
+ * at some point executes "repmgr standby promote".
+ *
+ * TODO: make "promote_command" and execute the same code used by
+ * "repmgr standby promote".
+ */
static FailoverState
promote_self(void)
{
@@ -3504,13 +3529,43 @@ promote_self(void)
sleep(config_file_options.promote_delay);
}
- record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &failed_primary);
-
- if (record_status != RECORD_FOUND)
+ if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID)
{
- log_error(_("unable to retrieve metadata record for failed upstream (ID: %i)"),
- local_node_info.upstream_node_id);
- return FAILOVER_STATE_PROMOTION_FAILED;
+ /*
+ * This is a corner-case situation where the repmgr metadata on the
+ * promotion candidate is outdated and the local node's upstream_node_id
+ * is not set. This is often an indication of potentially serious issues,
+ * such as the local node being very far behind the primary, or not being
+ * attached at all.
+ *
+ * In this case it may be desirable to restore the original primary.
+ * This behaviour can be controlled by the "always_promote" configuration option.
+ */
+ if (config_file_options.always_promote == false)
+ {
+ log_error(_("this node (ID: %i) does not have its upstream_node_id set, not promoting"),
+ local_node_info.node_id);
+ log_detail(_("the local node's metadata has not been updated since it became a standby"));
+ log_hint(_("set \"always_promote\" to \"true\" to force promotion in this situation"));
+ return FAILOVER_STATE_PROMOTION_FAILED;
+ }
+ else
+ {
+ log_warning(_("this node (ID: %i) does not have its upstream_node_id set, promoting anyway"),
+ local_node_info.node_id);
+ log_detail(_("\"always_promote\" is set to \"true\" "));
+ }
+ }
+ else
+ {
+ record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &failed_primary);
+
+ if (record_status != RECORD_FOUND)
+ {
+ log_error(_("unable to retrieve metadata record for failed upstream (ID: %i)"),
+ local_node_info.upstream_node_id);
+ return FAILOVER_STATE_PROMOTION_FAILED;
+ }
}
/* the presence of this command has been established already */