Count witness and zero-priority nodes in visibility check

This commit is contained in:
Ian Barwick
2019-03-14 11:17:51 +09:00
parent 56d9f5b856
commit 19bf4d7434
5 changed files with 41 additions and 31 deletions

View File

@@ -4898,7 +4898,7 @@ init_replication_info(ReplInfo *replication_info)
bool
get_replication_info(PGconn *conn, ReplInfo *replication_info)
get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info)
{
PQExpBufferData query;
PGresult *res = NULL;
@@ -4960,11 +4960,21 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
" END AS wal_replay_paused, ");
}
if (node_type == WITNESS)
{
appendPQExpBufferStr(&query,
" repmgr.get_upstream_last_seen() AS upstream_last_seen");
}
else
{
appendPQExpBufferStr(&query,
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
" THEN -1 "
" ELSE repmgr.get_upstream_last_seen() "
" END AS upstream_last_seen ");
}
appendPQExpBufferStr(&query,
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
" THEN -1 "
" ELSE repmgr.get_upstream_last_seen() "
" END AS upstream_last_seen "
" ) q ");
log_verbose(LOG_DEBUG, "get_replication_info():\n%s", query.data);

View File

@@ -554,7 +554,7 @@ XLogRecPtr get_primary_current_lsn(PGconn *conn);
XLogRecPtr get_node_current_lsn(PGconn *conn);
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
void init_replication_info(ReplInfo *replication_info);
bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
int get_replication_lag_seconds(PGconn *conn);
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
bool is_downstream_node_attached(PGconn *conn, char *node_name);

View File

@@ -413,7 +413,7 @@ do_node_status(void)
node_info.upstream_node_name,
node_info.upstream_node_id);
get_replication_info(conn, &replication_info);
get_replication_info(conn, node_info.type, &replication_info);
key_value_list_set_format(&node_status,
"Replication lag",

View File

@@ -2009,7 +2009,7 @@ do_standby_promote(void)
init_replication_info(&replication_info);
if (get_replication_info(conn, &replication_info) == false)
if (get_replication_info(conn, STANDBY, &replication_info) == false)
{
log_error(_("unable to retrieve replication information from local node"));
PQfinish(conn);
@@ -3262,7 +3262,7 @@ do_standby_switchover(void)
ReplInfo replication_info;
init_replication_info(&replication_info);
if (get_replication_info(local_conn, &replication_info) == false)
if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
{
log_error(_("unable to retrieve replication information from local node"));
PQfinish(local_conn);
@@ -4326,7 +4326,7 @@ do_standby_switchover(void)
for (i = 0; i < config_file_options.wal_receive_check_timeout; i++)
{
get_replication_info(local_conn, &replication_info);
get_replication_info(local_conn, STANDBY, &replication_info);
if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn)
break;

View File

@@ -2381,7 +2381,7 @@ update_monitoring_history(void)
init_replication_info(&replication_info);
if (get_replication_info(local_conn, &replication_info) == false)
if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
{
log_warning(_("unable to retrieve replication status information, unable to update monitoring history"));
return;
@@ -3376,7 +3376,7 @@ do_election(NodeInfoList *sibling_nodes)
}
/* get our lsn */
if (get_replication_info(local_conn, &local_replication_info) == false)
if (get_replication_info(local_conn, STANDBY, &local_replication_info) == false)
{
log_error(_("unable to retrieve replication information for local node"));
return ELECTION_LOST;
@@ -3444,24 +3444,6 @@ do_election(NodeInfoList *sibling_nodes)
}
}
/* don't interrogate a witness server */
if (cell->node_info->type == WITNESS)
{
log_debug("node %i is witness, not querying state", cell->node_info->node_id);
continue;
}
/* don't check 0-priority nodes */
if (cell->node_info->priority <= 0)
{
log_info(_("node %i has priority of %i, skipping"),
cell->node_info->node_id,
cell->node_info->priority);
continue;
}
/*
* check if repmgrd running - skip if not
*
@@ -3478,7 +3460,7 @@ do_election(NodeInfoList *sibling_nodes)
continue;
}
if (get_replication_info(cell->node_info->conn, &sibling_replication_info) == false)
if (get_replication_info(cell->node_info->conn, cell->node_info->type, &sibling_replication_info) == false)
{
log_warning(_("unable to retrieve replication information for node \"%s\" (ID: %i), skipping"),
cell->node_info->node_name,
@@ -3527,6 +3509,24 @@ do_election(NodeInfoList *sibling_nodes)
sibling_replication_info.upstream_last_seen);
}
/* don't interrogate a witness server */
if (cell->node_info->type == WITNESS)
{
log_debug("node %i is witness, not querying state", cell->node_info->node_id);
continue;
}
/* don't check 0-priority nodes */
if (cell->node_info->priority <= 0)
{
log_info(_("node %i has priority of %i, skipping"),
cell->node_info->node_id,
cell->node_info->priority);
continue;
}
/* get node's last receive LSN - if "higher" than current winner, current node is candidate */
cell->node_info->last_wal_receive_lsn = sibling_replication_info.last_wal_receive_lsn;