diff --git a/dbutils.c b/dbutils.c index 45db88e9..fc9c24c3 100644 --- a/dbutils.c +++ b/dbutils.c @@ -4818,6 +4818,7 @@ init_replication_info(ReplInfo *replication_info) replication_info->replication_lag_time = 0; replication_info->receiving_streamed_wal = true; replication_info->wal_replay_paused = false; + replication_info->primary_last_seen = -1; } @@ -4844,7 +4845,8 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) " END " " END AS replication_lag_time, " " last_wal_receive_lsn >= last_wal_replay_lsn AS receiving_streamed_wal, " - " wal_replay_paused " + " wal_replay_paused, " + " primary_last_seen " " FROM ( " " SELECT CURRENT_TIMESTAMP AS ts, " " pg_catalog.pg_last_xact_replay_timestamp() AS last_xact_replay_timestamp, "); @@ -4858,7 +4860,7 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) " CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE " " THEN FALSE " " ELSE pg_catalog.pg_is_wal_replay_paused() " - " END AS wal_replay_paused "); + " END AS wal_replay_paused, "); } else { @@ -4880,10 +4882,14 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) " CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE " " THEN FALSE " " ELSE pg_catalog.pg_is_xlog_replay_paused() " - " END AS wal_replay_paused "); + " END AS wal_replay_paused, "); } appendPQExpBufferStr(&query, + " CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE " + " THEN -1 " + " ELSE repmgr.get_primary_last_seen() " + " END AS primary_last_seen " " ) q "); log_verbose(LOG_DEBUG, "get_replication_info():\n%s", query.data); @@ -4905,6 +4911,7 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) replication_info->replication_lag_time = atoi(PQgetvalue(res, 0, 4)); replication_info->receiving_streamed_wal = atobool(PQgetvalue(res, 0, 5)); replication_info->wal_replay_paused = atobool(PQgetvalue(res, 0, 6)); + replication_info->primary_last_seen = atoi(PQgetvalue(res, 0, 7)); } termPQExpBuffer(&query); diff --git a/dbutils.h b/dbutils.h index 16b1ad9c..6f8b0b4b 100644 --- a/dbutils.h +++ b/dbutils.h @@ -308,6 +308,7 @@ typedef struct int replication_lag_time; bool receiving_streamed_wal; bool wal_replay_paused; + int primary_last_seen; } ReplInfo; typedef struct diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 78c8860f..3fd52084 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -3103,6 +3103,9 @@ do_election(void) */ bool primary_location_seen = false; + + int nodes_with_primary_still_visible = 0; + electoral_term = get_current_term(local_conn); if (electoral_term == -1) @@ -3306,7 +3309,7 @@ do_election(void) { /* * Theoretically the repmgrd on the node should have resumed WAL play - * at this point + * at this point. */ if (sibling_replication_info.last_wal_receive_lsn > sibling_replication_info.last_wal_replay_lsn) { @@ -3316,6 +3319,25 @@ do_election(void) } } + /* + * Check if node has seen primary "recently" - if so, we may have "partial primary visibility". + * For now we'll assume the primary is visible if it's been seen less than + * monitor_interval_secs * 2 seconds ago. We may need to adjust this, and/or make the value + * configurable. + */ + + + if (sibling_replication_info.primary_last_seen < (config_file_options.monitor_interval_secs * 2)) + { + nodes_with_primary_still_visible++; + log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"), + cell->node_info->node_id, sibling_replication_info.primary_last_seen); + } + else + { + log_info(_("node %i last saw primary node %i second(s) ago"), + cell->node_info->node_id, sibling_replication_info.primary_last_seen); + } /* get node's last receive LSN - if "higher" than current winner, current node is candidate */ cell->node_info->last_wal_receive_lsn = sibling_replication_info.last_wal_receive_lsn; @@ -3397,9 +3419,23 @@ do_election(void) return ELECTION_CANCELLED; } - log_debug("visible nodes: %i; total nodes: %i", + if (nodes_with_primary_still_visible > 0) + { + log_notice(_("%i nodes can seen the primary"), nodes_with_primary_still_visible); + // XXX list nodes as detail + + monitoring_state = MS_DEGRADED; + INSTR_TIME_SET_CURRENT(degraded_monitoring_start); + + reset_node_voting_status(); + + return ELECTION_CANCELLED; + } + + log_info(_("visible nodes: %i; total nodes: %i; no nodes have seen the primary within the last %i seconds"), visible_nodes, - total_nodes); + total_nodes, + (config_file_options.monitor_interval_secs * 2)); if (visible_nodes <= (total_nodes / 2.0)) {