repmgrd: improve logging output for standby monitoring

This commit is contained in:
Ian Barwick
2019-01-21 11:54:37 +09:00
parent 99161c38d2
commit 44cbb44500

View File

@@ -665,11 +665,14 @@ monitor_streaming_standby(void)
terminate(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
(void) get_node_record(upstream_conn, local_node_info.upstream_node_id, &upstream_node_info); log_debug("upstream node ID determined as %i", local_node_info.upstream_node_id);
(void) get_node_record(upstream_conn, local_node_info.upstream_node_id, &upstream_node_info);
} }
else else
{ {
log_debug("upstream node ID in local node record is %i", local_node_info.upstream_node_id);
record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info); record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info);
/* /*
@@ -731,10 +734,11 @@ monitor_streaming_standby(void)
if (upstream_node_info.type == STANDBY) if (upstream_node_info.type == STANDBY)
{ {
log_debug("upstream node is standby, connecting to primary");
/* /*
* Currently cascaded standbys need to be able to connect to the * Currently cascaded standbys need to be able to connect to the
* primary. We could possibly add a limited connection mode for cases * primary. We could possibly add a limited connection mode for cases
* where this isn't possible. * where this isn't possible, but that will complicate things further.
*/ */
primary_conn = establish_primary_db_connection(upstream_conn, false); primary_conn = establish_primary_db_connection(upstream_conn, false);
@@ -750,6 +754,7 @@ monitor_streaming_standby(void)
} }
else else
{ {
log_debug("upstream node is primary");
primary_conn = upstream_conn; primary_conn = upstream_conn;
} }
@@ -770,7 +775,17 @@ monitor_streaming_standby(void)
} }
} }
primary_node_id = get_primary_node_id(primary_conn); if (PQstatus(primary_conn) == CONNECTION_OK)
{
primary_node_id = get_primary_node_id(primary_conn);
log_debug("primary_node_id is %i", primary_node_id);
}
else
{
primary_node_id = get_primary_node_id(local_conn);
log_debug("primary_node_id according to local records is %i", primary_node_id);
}
/* Log startup event */ /* Log startup event */
if (startup_event_logged == false) if (startup_event_logged == false)
@@ -857,17 +872,24 @@ monitor_streaming_standby(void)
if (PQstatus(local_conn) != CONNECTION_OK) if (PQstatus(local_conn) != CONNECTION_OK)
{ {
check_connection(&local_node_info, &local_conn); check_connection(&local_node_info, &local_conn);
log_debug("YYY here");
} }
try_reconnect(&upstream_conn, &upstream_node_info); try_reconnect(&upstream_conn, &upstream_node_info);
/* Node has recovered - log and continue */ /* Upstream node has recovered - log and continue */
if (upstream_node_info.node_status == NODE_STATUS_UP) if (upstream_node_info.node_status == NODE_STATUS_UP)
{ {
int upstream_node_unreachable_elapsed = calculate_elapsed(upstream_node_unreachable_start); int upstream_node_unreachable_elapsed = calculate_elapsed(upstream_node_unreachable_start);
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("reconnected to upstream node after %i seconds"),
upstream_node_unreachable_elapsed);
log_notice("%s", event_details.data);
if (upstream_node_info.type == PRIMARY) if (upstream_node_info.type == PRIMARY)
{ {
primary_conn = upstream_conn; primary_conn = upstream_conn;
@@ -876,11 +898,12 @@ monitor_streaming_standby(void)
{ {
ExecStatusType ping_result; ExecStatusType ping_result;
termPQExpBuffer(&event_details);
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"), log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
upstream_node_info.node_name, upstream_node_info.node_id); upstream_node_info.node_name, upstream_node_info.node_id);
PQfinish(upstream_conn); PQfinish(upstream_conn);
upstream_conn = NULL; upstream_conn = NULL;
termPQExpBuffer(&event_details);
local_node_info.upstream_node_id = UNKNOWN_NODE_ID; local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
/* check local connection */ /* check local connection */
@@ -910,13 +933,6 @@ monitor_streaming_standby(void)
} }
} }
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("reconnected to upstream node after %i seconds"),
upstream_node_unreachable_elapsed);
log_notice("%s", event_details.data);
create_event_notification(primary_conn, create_event_notification(primary_conn,
&config_file_options, &config_file_options,
config_file_options.node_id, config_file_options.node_id,
@@ -1000,8 +1016,7 @@ monitor_streaming_standby(void)
terminate(ERR_MONITORING_TIMEOUT); terminate(ERR_MONITORING_TIMEOUT);
} }
log_debug("monitoring upstream node %i in degraded state for %i seconds",
log_debug("monitoring node %i in degraded state for %i seconds",
upstream_node_info.node_id, upstream_node_info.node_id,
degraded_monitoring_elapsed); degraded_monitoring_elapsed);
@@ -1011,6 +1026,10 @@ monitor_streaming_standby(void)
if (PQstatus(upstream_conn) == CONNECTION_OK) if (PQstatus(upstream_conn) == CONNECTION_OK)
{ {
log_debug(" upstream node %i has recovered",
upstream_node_info.node_id);
/* XXX check here if upstream is still primary */ /* XXX check here if upstream is still primary */
/* /*
* -> will be a problem if another node was promoted in * -> will be a problem if another node was promoted in