Improve database connection failure logging

Log the output of PQerrorStatus() in a couple of places where it was missing.

Additionally, always log the output of PQerrorStatus() starting with a blank
line, otherwise the first line looks like it was emitted by repmgr, and
it's harder to scan the error message.

Before:

    [2019-03-20 11:24:15] [DETAIL] could not connect to server: Connection refused
            Is the server running on host "localhost" (::1) and accepting
            TCP/IP connections on port 5501?
    could not connect to server: Connection refused
            Is the server running on host "localhost" (127.0.0.1) and accepting
            TCP/IP connections on port 5501?

After:

    [2019-03-20 11:27:21] [DETAIL]
    could not connect to server: Connection refused
            Is the server running on host "localhost" (::1) and accepting
            TCP/IP connections on port 5501?
    could not connect to server: Connection refused
            Is the server running on host "localhost" (127.0.0.1) and accepting
            TCP/IP connections on port 5501?
This commit is contained in:
Ian Barwick
2019-03-20 11:27:12 +09:00
parent a38e229e61
commit 5cbaff8d0a
6 changed files with 47 additions and 25 deletions

View File

@@ -76,7 +76,7 @@ log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
{
log_detail("%s", PQerrorMessage(conn));
log_detail("\n%s", PQerrorMessage(conn));
}
if (query_text != NULL)
@@ -190,13 +190,13 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
{
if (log_notice)
{
log_notice(_("connection to database failed:\n %s"),
PQerrorMessage(conn));
log_notice(_("connection to database failed"));
log_detail("\n%s", PQerrorMessage(conn));
}
else
{
log_error(_("connection to database failed:\n %s"),
PQerrorMessage(conn));
log_error(_("connection to database failed"));
log_detail("\n%s", PQerrorMessage(conn));
}
log_detail(_("attempted to connect using:\n %s"),
connection_string);
@@ -287,8 +287,9 @@ establish_db_connection_by_params(t_conninfo_param_list *param_list,
/* Check to see that the backend connection was successfully made */
if ((PQstatus(conn) != CONNECTION_OK))
{
log_error(_("connection to database failed:\n %s"),
PQerrorMessage(conn));
log_error(_("connection to database failed"));
log_detail("\n%s", PQerrorMessage(conn));
if (exit_on_error)
{
PQfinish(conn);
@@ -4202,7 +4203,7 @@ cancel_query(PGconn *conn, int timeout)
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
{
log_warning(_("unable to cancel current query"));
log_detail("%s", errbuf);
log_detail("\n%s", errbuf);
PQfreeCancel(pgcancel);
return false;
}
@@ -4314,6 +4315,7 @@ is_server_available_params(t_conninfo_param_list *param_list)
{
char *conninfo_str = param_list_to_string(param_list);
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for %s is %i", conninfo_str, (int)status);
pfree(conninfo_str);
}
@@ -4351,7 +4353,7 @@ connection_ping_reconnect(PGconn *conn)
if (PQstatus(conn) != CONNECTION_OK)
{
log_warning(_("connection error, attempting to reset"));
log_detail("%s", PQerrorMessage(conn));
log_detail("\n%s", PQerrorMessage(conn));
PQreset(conn);
ping_result = connection_ping(conn);
}

View File

@@ -1305,8 +1305,7 @@ do_standby_register(void)
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_detail("%s",
PQerrorMessage(conn));
log_detail("\n%s", PQerrorMessage(conn));
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
exit(ERR_BAD_CONFIG);
@@ -1887,7 +1886,7 @@ do_standby_unregister(void)
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to primary server"));
log_detail("%s", PQerrorMessage(conn));
log_detail("\n%s", PQerrorMessage(conn));
exit(ERR_BAD_CONFIG);
}
@@ -3986,14 +3985,14 @@ do_standby_switchover(void)
for (cell = all_nodes.head; cell; cell = cell->next)
{
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id;
repmgrd_info[i]->pid = UNKNOWN_PID;
repmgrd_info[i]->paused = false;
repmgrd_info[i]->running = false;
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
/*
@@ -4304,6 +4303,9 @@ do_standby_switchover(void)
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_warning(_("connection to local node lost, reconnecting..."));
log_detail("\n%s", PQerrorMessage(local_conn));
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, false);
if (PQstatus(local_conn) != CONNECTION_OK)

View File

@@ -56,8 +56,7 @@ do_witness_register(void)
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_detail("%s",
PQerrorMessage(witness_conn));
log_detail("\n%s", PQerrorMessage(witness_conn));
log_hint(_("the witness node must be running before it can be registered"));
exit(ERR_BAD_CONFIG);
}
@@ -411,7 +410,7 @@ do_witness_unregister(void)
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_detail("%s", PQerrorMessage(local_conn));
log_detail("\n%s", PQerrorMessage(local_conn));
exit(ERR_BAD_CONFIG);
}
@@ -437,7 +436,7 @@ do_witness_unregister(void)
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to primary"));
log_detail("%s", PQerrorMessage(primary_conn));
log_detail("\n%s", PQerrorMessage(primary_conn));
if (local_node_available == true)
{

View File

@@ -2457,6 +2457,7 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
if (PQstatus(*conn) != CONNECTION_OK)
{
log_error(_("no database connection available"));
log_detail("\n%s", PQerrorMessage(*conn));
exit(ERR_INTERNAL);
}

View File

@@ -537,8 +537,6 @@ check_primary_status(int degraded_monitoring_elapsed)
if (PQstatus(new_primary_conn) != CONNECTION_OK)
{
close_connection(&new_primary_conn);
if (primary_node_id == UNKNOWN_NODE_ID)
{
log_warning(_("unable to determine a new primary node"));
@@ -546,8 +544,11 @@ check_primary_status(int degraded_monitoring_elapsed)
else
{
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
log_detail("\n%s", PQerrorMessage(new_primary_conn));
}
close_connection(&new_primary_conn);
/* "true" to indicate repmgrd should continue monitoring in degraded state */
return true;
}
@@ -1604,6 +1605,8 @@ monitor_streaming_witness(void)
}
else
{
log_warning(_("unable to connect to primary"));
log_detail("\n%s", PQerrorMessage(primary_conn));
/*
* Here we're unable to connect to a primary despite having scanned all
* known nodes, so we'll grab the record of the node we think is primary
@@ -2760,12 +2763,16 @@ promote_self(void)
/* connection should stay up, but check just in case */
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_warning(_("local database connection not available"));
log_detail("\n%s", PQerrorMessage(local_conn));
local_conn = establish_db_connection(local_node_info.conninfo, true);
/* assume node failed */
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_error(_("unable to reconnect to local node"));
log_detail("\n%s", PQerrorMessage(local_conn));
/* XXX handle this */
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
}
@@ -2869,17 +2876,23 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
for (cell = standby_nodes->head; cell; cell = cell->next)
{
log_verbose(LOG_DEBUG, "intending to notify node %i... ", cell->node_info->node_id);
log_verbose(LOG_DEBUG, "intending to notify node %i...", cell->node_info->node_id);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
log_debug("reconnecting to node %i... ", cell->node_info->node_id);
log_info(_("reconnecting to node \"%s\" (node ID: %i)..."),
cell->node_info->node_name,
cell->node_info->node_id);
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
}
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
log_debug("unable to reconnect to %i ... ", cell->node_info->node_id);
log_warning(_("unable to reconnect to \"%s\" (node ID: %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
continue;
}
@@ -3788,6 +3801,7 @@ reset_node_voting_status(void)
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_error(_("reset_node_voting_status(): local_conn not set"));
log_detail("\n%s", PQerrorMessage(local_conn));
return;
}
reset_voting_status(local_conn);
@@ -3799,7 +3813,10 @@ check_connection(t_node_info *node_info, PGconn **conn)
{
if (is_server_available(node_info->conninfo) == false)
{
log_warning(_("connection to node %i lost"), node_info->node_id);
log_warning(_("connection to node \"%s\" (ID: %i) lost"),
node_info->node_name,
node_info->node_id);
log_detail("\n%s", PQerrorMessage(*conn));
PQfinish(*conn);
*conn = NULL;
}

View File

@@ -433,7 +433,7 @@ main(int argc, char **argv)
if (extension_status == REPMGR_UNKNOWN)
{
log_error(_("unable to determine status of \"repmgr\" extension"));
log_detail("%s", PQerrorMessage(local_conn));
log_detail("\n%s", PQerrorMessage(local_conn));
close_connection(&local_conn);
exit(ERR_DB_QUERY);
}
@@ -846,6 +846,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
if (PQstatus(test_conn) != CONNECTION_OK)
{
log_warning(_("unable to connect to \"%s\""), conninfo);
log_detail("\n%s", PQerrorMessage(test_conn));
success = false;
}
PQfinish(test_conn);