Minor refactoring of do_master_failover()

- rename some variables for clarity
- ensure all structures are initialised correctly
- update code comments
This commit is contained in:
Ian Barwick
2016-08-16 11:23:59 +09:00
parent d244fb29d7
commit 9831cabd4d

View File

@@ -315,8 +315,9 @@ main(int argc, char **argv)
/* /*
* MAIN LOOP This loops cycles at startup and once per failover and * MAIN LOOP This loops cycles at startup and once per failover and
* Requisites: - my_local_conn needs to be already setted with an active * Requisites:
* connection - no master connection * - my_local_conn must have an active connection to the monitored node
* - master_conn must not be open
*/ */
do do
{ {
@@ -1253,7 +1254,7 @@ do_master_failover(void)
PGresult *res; PGresult *res;
char sqlquery[QUERY_STR_LEN]; char sqlquery[QUERY_STR_LEN];
int total_nodes = 0; int total_active_nodes = 0;
int visible_nodes = 0; int visible_nodes = 0;
int ready_nodes = 0; int ready_nodes = 0;
@@ -1284,7 +1285,7 @@ do_master_failover(void)
"SELECT id, conninfo, type, upstream_node_id " "SELECT id, conninfo, type, upstream_node_id "
" FROM %s.repl_nodes " " FROM %s.repl_nodes "
" WHERE cluster = '%s' " " WHERE cluster = '%s' "
" AND active IS TRUE " " AND active IS TRUE "
" AND priority > 0 " " AND priority > 0 "
" ORDER BY priority DESC, id " " ORDER BY priority DESC, id "
" LIMIT %i ", " LIMIT %i ",
@@ -1300,32 +1301,25 @@ do_master_failover(void)
terminate(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }
/* total_active_nodes = PQntuples(res);
* total nodes that are registered log_debug(_("%d active nodes registered\n"), total_active_nodes);
*/
total_nodes = PQntuples(res);
log_debug(_("%d active nodes registered\n"), total_nodes);
/* /*
* Build an array with the nodes and indicate which ones are visible and * Build an array with the nodes and indicate which ones are visible and
* ready * ready
*/ */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_active_nodes; i++)
{ {
char node_type[MAXLEN];
nodes[i] = (t_node_info) T_NODE_INFO_INITIALIZER;
nodes[i].node_id = atoi(PQgetvalue(res, i, 0)); nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXCONNINFO); strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXCONNINFO);
strncpy(node_type, PQgetvalue(res, i, 2), MAXLEN);
nodes[i].type = parse_node_type(PQgetvalue(res, i, 2)); nodes[i].type = parse_node_type(node_type);
/* Copy details of the failed node */
/* XXX only node_id is actually used later */
if (nodes[i].type == MASTER)
{
failed_master.node_id = nodes[i].node_id;
failed_master.xlog_location = nodes[i].xlog_location;
failed_master.is_ready = nodes[i].is_ready;
}
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3)); nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
@@ -1336,12 +1330,21 @@ do_master_failover(void)
nodes[i].is_visible = false; nodes[i].is_visible = false;
nodes[i].is_ready = false; nodes[i].is_ready = false;
nodes[i].xlog_location = InvalidXLogRecPtr; /* Copy details of the failed master node */
/* XXX only node_id is actually used later */
if (nodes[i].type == MASTER)
{
failed_master.node_id = nodes[i].node_id;
failed_master.xlog_location = nodes[i].xlog_location;
failed_master.is_ready = nodes[i].is_ready;
}
log_debug(_("node=%d conninfo=\"%s\" type=%s\n"), log_debug(_("node=%i conninfo=\"%s\" type=%s\n"),
nodes[i].node_id, nodes[i].conninfo_str, nodes[i].node_id,
PQgetvalue(res, i, 2)); nodes[i].conninfo_str,
node_type);
/* XXX do we need to try and connect to the master here? */
node_conn = establish_db_connection(nodes[i].conninfo_str, false); node_conn = establish_db_connection(nodes[i].conninfo_str, false);
/* if we can't see the node just skip it */ /* if we can't see the node just skip it */
@@ -1361,13 +1364,13 @@ do_master_failover(void)
PQclear(res); PQclear(res);
log_debug(_("total nodes counted: registered=%d, visible=%d\n"), log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
total_nodes, visible_nodes); total_active_nodes, visible_nodes);
/* /*
* Am I on the group that should keep alive? If I see less than half of * Am I on the group that should keep alive? If I see less than half of
* total_nodes then I should do nothing * total_active_nodes then I should do nothing
*/ */
if (visible_nodes < (total_nodes / 2.0)) if (visible_nodes < (total_active_nodes / 2.0))
{ {
log_err(_("Unable to reach most of the nodes.\n" log_err(_("Unable to reach most of the nodes.\n"
"Let the other standby servers decide which one will be the master.\n" "Let the other standby servers decide which one will be the master.\n"
@@ -1376,7 +1379,7 @@ do_master_failover(void)
} }
/* Query all available nodes to determine readiness and LSN */ /* Query all available nodes to determine readiness and LSN */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_active_nodes; i++)
{ {
log_debug("checking node %i...\n", nodes[i].node_id); log_debug("checking node %i...\n", nodes[i].node_id);
@@ -1454,7 +1457,7 @@ do_master_failover(void)
PQclear(res); PQclear(res);
/* Wait for each node to come up and report a valid LSN */ /* Wait for each node to come up and report a valid LSN */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_active_nodes; i++)
{ {
/* /*
* ensure witness server is marked as ready, and skip * ensure witness server is marked as ready, and skip
@@ -1614,7 +1617,7 @@ do_master_failover(void)
/* /*
* determine which one is the best candidate to promote to master * determine which one is the best candidate to promote to master
*/ */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_active_nodes; i++)
{ {
/* witness server can never be a candidate */ /* witness server can never be a candidate */
if (nodes[i].type == WITNESS) if (nodes[i].type == WITNESS)
@@ -1839,8 +1842,10 @@ do_master_failover(void)
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
} }
/* to force it to re-calculate mode and master node */ /*
// ^ ZZZ check that behaviour ^ * setting "failover_done" to true will cause the node's monitoring loop
* to restart in the appropriate mode for the node's (possibly new) role
*/
failover_done = true; failover_done = true;
} }