mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Improve database connection failure logging
Log the output of PQerrorStatus() in a couple of places where it was missing.
Additionally, always log the output of PQerrorStatus() starting with a blank
line, otherwise the first line looks like it was emitted by repmgr, and
it's harder to scan the error message.
Before:
[2019-03-20 11:24:15] [DETAIL] could not connect to server: Connection refused
Is the server running on host "localhost" (::1) and accepting
TCP/IP connections on port 5501?
could not connect to server: Connection refused
Is the server running on host "localhost" (127.0.0.1) and accepting
TCP/IP connections on port 5501?
After:
[2019-03-20 11:27:21] [DETAIL]
could not connect to server: Connection refused
Is the server running on host "localhost" (::1) and accepting
TCP/IP connections on port 5501?
could not connect to server: Connection refused
Is the server running on host "localhost" (127.0.0.1) and accepting
TCP/IP connections on port 5501?
This commit is contained in:
20
dbutils.c
20
dbutils.c
@@ -76,7 +76,7 @@ log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
|||||||
|
|
||||||
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
|
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (query_text != NULL)
|
if (query_text != NULL)
|
||||||
@@ -190,13 +190,13 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
|||||||
{
|
{
|
||||||
if (log_notice)
|
if (log_notice)
|
||||||
{
|
{
|
||||||
log_notice(_("connection to database failed:\n %s"),
|
log_notice(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error(_("connection to database failed:\n %s"),
|
log_error(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
log_detail(_("attempted to connect using:\n %s"),
|
log_detail(_("attempted to connect using:\n %s"),
|
||||||
connection_string);
|
connection_string);
|
||||||
@@ -287,8 +287,9 @@ establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
|||||||
/* Check to see that the backend connection was successfully made */
|
/* Check to see that the backend connection was successfully made */
|
||||||
if ((PQstatus(conn) != CONNECTION_OK))
|
if ((PQstatus(conn) != CONNECTION_OK))
|
||||||
{
|
{
|
||||||
log_error(_("connection to database failed:\n %s"),
|
log_error(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
|
|
||||||
if (exit_on_error)
|
if (exit_on_error)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
@@ -4203,7 +4204,7 @@ cancel_query(PGconn *conn, int timeout)
|
|||||||
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
|
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to cancel current query"));
|
log_warning(_("unable to cancel current query"));
|
||||||
log_detail("%s", errbuf);
|
log_detail("\n%s", errbuf);
|
||||||
PQfreeCancel(pgcancel);
|
PQfreeCancel(pgcancel);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -4315,6 +4316,7 @@ is_server_available_params(t_conninfo_param_list *param_list)
|
|||||||
{
|
{
|
||||||
char *conninfo_str = param_list_to_string(param_list);
|
char *conninfo_str = param_list_to_string(param_list);
|
||||||
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for %s is %i", conninfo_str, (int)status);
|
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for %s is %i", conninfo_str, (int)status);
|
||||||
|
|
||||||
pfree(conninfo_str);
|
pfree(conninfo_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4352,7 +4354,7 @@ connection_ping_reconnect(PGconn *conn)
|
|||||||
if (PQstatus(conn) != CONNECTION_OK)
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("connection error, attempting to reset"));
|
log_warning(_("connection error, attempting to reset"));
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
PQreset(conn);
|
PQreset(conn);
|
||||||
ping_result = connection_ping(conn);
|
ping_result = connection_ping(conn);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1305,8 +1305,7 @@ do_standby_register(void)
|
|||||||
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s",
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
PQerrorMessage(conn));
|
|
||||||
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
|
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1887,7 +1886,7 @@ do_standby_unregister(void)
|
|||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to primary server"));
|
log_error(_("unable to connect to primary server"));
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3986,14 +3985,14 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
|
||||||
|
|
||||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
||||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
||||||
repmgrd_info[i]->pid = UNKNOWN_PID;
|
repmgrd_info[i]->pid = UNKNOWN_PID;
|
||||||
repmgrd_info[i]->paused = false;
|
repmgrd_info[i]->paused = false;
|
||||||
repmgrd_info[i]->running = false;
|
repmgrd_info[i]->running = false;
|
||||||
|
|
||||||
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -4304,6 +4303,9 @@ do_standby_switchover(void)
|
|||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("connection to local node lost, reconnecting..."));
|
log_warning(_("connection to local node lost, reconnecting..."));
|
||||||
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
PQfinish(local_conn);
|
||||||
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
|
|||||||
@@ -56,8 +56,7 @@ do_witness_register(void)
|
|||||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s",
|
log_detail("\n%s", PQerrorMessage(witness_conn));
|
||||||
PQerrorMessage(witness_conn));
|
|
||||||
log_hint(_("the witness node must be running before it can be registered"));
|
log_hint(_("the witness node must be running before it can be registered"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -411,7 +410,7 @@ do_witness_unregister(void)
|
|||||||
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -437,7 +436,7 @@ do_witness_unregister(void)
|
|||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to primary"));
|
log_error(_("unable to connect to primary"));
|
||||||
log_detail("%s", PQerrorMessage(primary_conn));
|
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||||
|
|
||||||
if (local_node_available == true)
|
if (local_node_available == true)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2457,6 +2457,7 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
|
|||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("no database connection available"));
|
log_error(_("no database connection available"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(*conn));
|
||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -537,8 +537,6 @@ check_primary_status(int degraded_monitoring_elapsed)
|
|||||||
|
|
||||||
if (PQstatus(new_primary_conn) != CONNECTION_OK)
|
if (PQstatus(new_primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
close_connection(&new_primary_conn);
|
|
||||||
|
|
||||||
if (primary_node_id == UNKNOWN_NODE_ID)
|
if (primary_node_id == UNKNOWN_NODE_ID)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to determine a new primary node"));
|
log_warning(_("unable to determine a new primary node"));
|
||||||
@@ -546,8 +544,11 @@ check_primary_status(int degraded_monitoring_elapsed)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
|
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
|
||||||
|
log_detail("\n%s", PQerrorMessage(new_primary_conn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
close_connection(&new_primary_conn);
|
||||||
|
|
||||||
/* "true" to indicate repmgrd should continue monitoring in degraded state */
|
/* "true" to indicate repmgrd should continue monitoring in degraded state */
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1604,6 +1605,8 @@ monitor_streaming_witness(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
log_warning(_("unable to connect to primary"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||||
/*
|
/*
|
||||||
* Here we're unable to connect to a primary despite having scanned all
|
* Here we're unable to connect to a primary despite having scanned all
|
||||||
* known nodes, so we'll grab the record of the node we think is primary
|
* known nodes, so we'll grab the record of the node we think is primary
|
||||||
@@ -2760,12 +2763,16 @@ promote_self(void)
|
|||||||
/* connection should stay up, but check just in case */
|
/* connection should stay up, but check just in case */
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
log_warning(_("local database connection not available"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
|
||||||
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
||||||
|
|
||||||
/* assume node failed */
|
/* assume node failed */
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node"));
|
log_error(_("unable to reconnect to local node"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
/* XXX handle this */
|
/* XXX handle this */
|
||||||
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
||||||
}
|
}
|
||||||
@@ -2869,17 +2876,23 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
|||||||
|
|
||||||
for (cell = standby_nodes->head; cell; cell = cell->next)
|
for (cell = standby_nodes->head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "intending to notify node %i... ", cell->node_info->node_id);
|
log_verbose(LOG_DEBUG, "intending to notify node %i...", cell->node_info->node_id);
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_debug("reconnecting to node %i... ", cell->node_info->node_id);
|
log_info(_("reconnecting to node \"%s\" (node ID: %i)..."),
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_debug("unable to reconnect to %i ... ", cell->node_info->node_id);
|
log_warning(_("unable to reconnect to \"%s\" (node ID: %i)"),
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -3788,6 +3801,7 @@ reset_node_voting_status(void)
|
|||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("reset_node_voting_status(): local_conn not set"));
|
log_error(_("reset_node_voting_status(): local_conn not set"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
reset_voting_status(local_conn);
|
reset_voting_status(local_conn);
|
||||||
@@ -3799,7 +3813,10 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
|||||||
{
|
{
|
||||||
if (is_server_available(node_info->conninfo) == false)
|
if (is_server_available(node_info->conninfo) == false)
|
||||||
{
|
{
|
||||||
log_warning(_("connection to node %i lost"), node_info->node_id);
|
log_warning(_("connection to node \"%s\" (ID: %i) lost"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
log_detail("\n%s", PQerrorMessage(*conn));
|
||||||
PQfinish(*conn);
|
PQfinish(*conn);
|
||||||
*conn = NULL;
|
*conn = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -433,7 +433,7 @@ main(int argc, char **argv)
|
|||||||
if (extension_status == REPMGR_UNKNOWN)
|
if (extension_status == REPMGR_UNKNOWN)
|
||||||
{
|
{
|
||||||
log_error(_("unable to determine status of \"repmgr\" extension"));
|
log_error(_("unable to determine status of \"repmgr\" extension"));
|
||||||
log_detail("%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
close_connection(&local_conn);
|
close_connection(&local_conn);
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
@@ -846,6 +846,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||||
|
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
PQfinish(test_conn);
|
PQfinish(test_conn);
|
||||||
|
|||||||
Reference in New Issue
Block a user