Add some hopefully correct comments for future reference

Also some debugging output which will need to be removed later.
This commit is contained in:
Ian Barwick
2015-01-13 11:12:20 +09:00
parent a1a61e5ec3
commit b80d25ad33

View File

@@ -20,6 +20,9 @@
*/ */
/* ZZZ - remove superfluous debugging output */
#include <signal.h> #include <signal.h>
#include <sys/types.h> #include <sys/types.h>
@@ -424,7 +427,10 @@ main(int argc, char **argv)
if (my_local_mode == WITNESS_MODE) if (my_local_mode == WITNESS_MODE)
witness_monitor(); witness_monitor();
else if (my_local_mode == STANDBY_MODE) else if (my_local_mode == STANDBY_MODE)
{
standby_monitor(); standby_monitor();
log_debug(_("returned from standby_monitor()\n"));
}
sleep(local_options.monitor_interval_secs); sleep(local_options.monitor_interval_secs);
if (got_SIGHUP) if (got_SIGHUP)
@@ -441,6 +447,10 @@ main(int argc, char **argv)
} }
got_SIGHUP = false; got_SIGHUP = false;
} }
if(failover_done)
{
log_debug(_("standby check loop will terminate\n"));
}
} while (!failover_done); } while (!failover_done);
break; break;
default: default:
@@ -448,6 +458,8 @@ main(int argc, char **argv)
local_options.node); local_options.node);
} }
log_debug(_("end of main loop\n"));
failover_done = false; failover_done = false;
} while (true); } while (true);
@@ -678,6 +690,7 @@ standby_monitor(void)
* and a new primary_conn * and a new primary_conn
*/ */
do_failover(); do_failover();
log_debug("standby_monitor() - returning from do_failover()\n");
return; return;
} }
} }
@@ -685,11 +698,22 @@ standby_monitor(void)
/* Check if we still are a standby, we could have been promoted */ /* Check if we still are a standby, we could have been promoted */
do do
{ {
log_debug("standby_monitor() - checking if still standby\n");
ret = is_standby(my_local_conn); ret = is_standby(my_local_conn);
switch (ret) switch (ret)
{ {
case 0: case 0:
/*
* This situation can occur if `pg_ctl promote` was manually executed
* on the node. If the original master is still running after this
* node has been promoted, we're in a "two brain" situation which
* will require manual resolution as there's no way of determing
* which master is the correct one.
*
* XXX check if the original master is still active and display a
* warning
*/
log_err(_("It seems like we have been promoted, so exit from monitoring...\n")); log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
terminate(1); terminate(1);
break; break;
@@ -910,6 +934,7 @@ do_failover(void)
/* Query all the nodes to determine which ones are ready */ /* Query all the nodes to determine which ones are ready */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_nodes; i++)
{ {
log_debug("checking node %i...\n", nodes[i].node_id);
/* if the node is not visible, skip it */ /* if the node is not visible, skip it */
if (!nodes[i].is_visible) if (!nodes[i].is_visible)
continue; continue;
@@ -975,13 +1000,14 @@ do_failover(void)
update_shared_memory(last_wal_standby_applied); update_shared_memory(last_wal_standby_applied);
terminate(ERR_DB_QUERY); terminate(ERR_DB_QUERY);
} }
/* write last location in shared memory */ /* write last location in shared memory */
update_shared_memory(PQgetvalue(res, 0, 0)); update_shared_memory(PQgetvalue(res, 0, 0));
PQclear(res); PQclear(res);
/* Wait for each node to come up and report a valid LSN */
for (i = 0; i < total_nodes; i++) for (i = 0; i < total_nodes; i++)
{ {
log_debug(_("is_ready check for node %i\n"), nodes[i].node_id);
while (!nodes[i].is_ready) while (!nodes[i].is_ready)
{ {
/* /*
@@ -1003,6 +1029,7 @@ do_failover(void)
break; break;
/* if the node is ready there is nothing to check, skip it too */ /* if the node is ready there is nothing to check, skip it too */
/* ZZZ is this check pointless? */
if (nodes[i].is_ready) if (nodes[i].is_ready)
break; break;
@@ -1038,17 +1065,27 @@ do_failover(void)
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok); xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
/* If position is 0/0, check for format error, otherwise continue loop */ /* If position reported as "invalid", check for format error or
* empty string; oherwise position is 0/0 and we need to continue
* looping until a valid LSN is reported
*/
if(xlog_recptr == InvalidXLogRecPtr) if(xlog_recptr == InvalidXLogRecPtr)
{ {
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */ log_debug("Invalid LSN returned - '%s'", PQgetvalue(res, 0, 0));
if(lsn_format_ok == false) if(lsn_format_ok == false)
{ {
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
if(*PQgetvalue(res, 0, 0) == '\0') if(*PQgetvalue(res, 0, 0) == '\0')
{ {
log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n"); log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/*
* Very unlikely to happen; in the absence of any better
* strategy keep checking
*/
log_warning(_("Unable to parse LSN \"%s\"\n"), log_warning(_("Unable to parse LSN \"%s\"\n"),
PQgetvalue(res, 0, 0)); PQgetvalue(res, 0, 0));
} }
@@ -1056,6 +1093,7 @@ do_failover(void)
PQclear(res); PQclear(res);
PQfinish(node_conn); PQfinish(node_conn);
/* If position is 0/0, keep checking */
continue; continue;
} }
@@ -1186,6 +1224,7 @@ do_failover(void)
terminate(ERR_FAILOVER_FAIL); terminate(ERR_FAILOVER_FAIL);
} }
log_debug("failover done\n");
/* to force it to re-calculate mode and master node */ /* to force it to re-calculate mode and master node */
failover_done = true; failover_done = true;