mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Improve database connection failure logging
Log the output of PQerrorStatus() in a couple of places where it was missing.
Additionally, always log the output of PQerrorStatus() starting with a blank
line, otherwise the first line looks like it was emitted by repmgr, and
it's harder to scan the error message.
Before:
[2019-03-20 11:24:15] [DETAIL] could not connect to server: Connection refused
Is the server running on host "localhost" (::1) and accepting
TCP/IP connections on port 5501?
could not connect to server: Connection refused
Is the server running on host "localhost" (127.0.0.1) and accepting
TCP/IP connections on port 5501?
After:
[2019-03-20 11:27:21] [DETAIL]
could not connect to server: Connection refused
Is the server running on host "localhost" (::1) and accepting
TCP/IP connections on port 5501?
could not connect to server: Connection refused
Is the server running on host "localhost" (127.0.0.1) and accepting
TCP/IP connections on port 5501?
This commit is contained in:
20
dbutils.c
20
dbutils.c
@@ -76,7 +76,7 @@ log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
||||
|
||||
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
|
||||
{
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
}
|
||||
|
||||
if (query_text != NULL)
|
||||
@@ -190,13 +190,13 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
||||
{
|
||||
if (log_notice)
|
||||
{
|
||||
log_notice(_("connection to database failed:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
log_notice(_("connection to database failed"));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("connection to database failed:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
log_error(_("connection to database failed"));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
}
|
||||
log_detail(_("attempted to connect using:\n %s"),
|
||||
connection_string);
|
||||
@@ -287,8 +287,9 @@ establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||
/* Check to see that the backend connection was successfully made */
|
||||
if ((PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
log_error(_("connection to database failed:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
log_error(_("connection to database failed"));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
|
||||
if (exit_on_error)
|
||||
{
|
||||
PQfinish(conn);
|
||||
@@ -4202,7 +4203,7 @@ cancel_query(PGconn *conn, int timeout)
|
||||
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
|
||||
{
|
||||
log_warning(_("unable to cancel current query"));
|
||||
log_detail("%s", errbuf);
|
||||
log_detail("\n%s", errbuf);
|
||||
PQfreeCancel(pgcancel);
|
||||
return false;
|
||||
}
|
||||
@@ -4314,6 +4315,7 @@ is_server_available_params(t_conninfo_param_list *param_list)
|
||||
{
|
||||
char *conninfo_str = param_list_to_string(param_list);
|
||||
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for %s is %i", conninfo_str, (int)status);
|
||||
|
||||
pfree(conninfo_str);
|
||||
}
|
||||
|
||||
@@ -4351,7 +4353,7 @@ connection_ping_reconnect(PGconn *conn)
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("connection error, attempting to reset"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
PQreset(conn);
|
||||
ping_result = connection_ping(conn);
|
||||
}
|
||||
|
||||
@@ -1305,8 +1305,7 @@ do_standby_register(void)
|
||||
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
log_detail("%s",
|
||||
PQerrorMessage(conn));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
@@ -1887,7 +1886,7 @@ do_standby_unregister(void)
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to primary server"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
log_detail("\n%s", PQerrorMessage(conn));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -3986,14 +3985,14 @@ do_standby_switchover(void)
|
||||
|
||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
|
||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
||||
repmgrd_info[i]->pid = UNKNOWN_PID;
|
||||
repmgrd_info[i]->paused = false;
|
||||
repmgrd_info[i]->running = false;
|
||||
|
||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
/*
|
||||
@@ -4304,6 +4303,9 @@ do_standby_switchover(void)
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("connection to local node lost, reconnecting..."));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
PQfinish(local_conn);
|
||||
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
|
||||
@@ -56,8 +56,7 @@ do_witness_register(void)
|
||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
log_detail("%s",
|
||||
PQerrorMessage(witness_conn));
|
||||
log_detail("\n%s", PQerrorMessage(witness_conn));
|
||||
log_hint(_("the witness node must be running before it can be registered"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -411,7 +410,7 @@ do_witness_unregister(void)
|
||||
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
log_detail("%s", PQerrorMessage(local_conn));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -437,7 +436,7 @@ do_witness_unregister(void)
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to primary"));
|
||||
log_detail("%s", PQerrorMessage(primary_conn));
|
||||
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||
|
||||
if (local_node_available == true)
|
||||
{
|
||||
|
||||
@@ -2457,6 +2457,7 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
|
||||
if (PQstatus(*conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("no database connection available"));
|
||||
log_detail("\n%s", PQerrorMessage(*conn));
|
||||
exit(ERR_INTERNAL);
|
||||
}
|
||||
|
||||
|
||||
@@ -537,8 +537,6 @@ check_primary_status(int degraded_monitoring_elapsed)
|
||||
|
||||
if (PQstatus(new_primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
if (primary_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
log_warning(_("unable to determine a new primary node"));
|
||||
@@ -546,8 +544,11 @@ check_primary_status(int degraded_monitoring_elapsed)
|
||||
else
|
||||
{
|
||||
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
|
||||
log_detail("\n%s", PQerrorMessage(new_primary_conn));
|
||||
}
|
||||
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
/* "true" to indicate repmgrd should continue monitoring in degraded state */
|
||||
return true;
|
||||
}
|
||||
@@ -1604,6 +1605,8 @@ monitor_streaming_witness(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("unable to connect to primary"));
|
||||
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||
/*
|
||||
* Here we're unable to connect to a primary despite having scanned all
|
||||
* known nodes, so we'll grab the record of the node we think is primary
|
||||
@@ -2760,12 +2763,16 @@ promote_self(void)
|
||||
/* connection should stay up, but check just in case */
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("local database connection not available"));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
|
||||
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
||||
|
||||
/* assume node failed */
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to reconnect to local node"));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
/* XXX handle this */
|
||||
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
||||
}
|
||||
@@ -2869,17 +2876,23 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
||||
|
||||
for (cell = standby_nodes->head; cell; cell = cell->next)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "intending to notify node %i... ", cell->node_info->node_id);
|
||||
log_verbose(LOG_DEBUG, "intending to notify node %i...", cell->node_info->node_id);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
log_debug("reconnecting to node %i... ", cell->node_info->node_id);
|
||||
log_info(_("reconnecting to node \"%s\" (node ID: %i)..."),
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id);
|
||||
|
||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||
}
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
log_debug("unable to reconnect to %i ... ", cell->node_info->node_id);
|
||||
log_warning(_("unable to reconnect to \"%s\" (node ID: %i)"),
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id);
|
||||
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
||||
|
||||
continue;
|
||||
}
|
||||
@@ -3788,6 +3801,7 @@ reset_node_voting_status(void)
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("reset_node_voting_status(): local_conn not set"));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
return;
|
||||
}
|
||||
reset_voting_status(local_conn);
|
||||
@@ -3799,7 +3813,10 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
||||
{
|
||||
if (is_server_available(node_info->conninfo) == false)
|
||||
{
|
||||
log_warning(_("connection to node %i lost"), node_info->node_id);
|
||||
log_warning(_("connection to node \"%s\" (ID: %i) lost"),
|
||||
node_info->node_name,
|
||||
node_info->node_id);
|
||||
log_detail("\n%s", PQerrorMessage(*conn));
|
||||
PQfinish(*conn);
|
||||
*conn = NULL;
|
||||
}
|
||||
|
||||
@@ -433,7 +433,7 @@ main(int argc, char **argv)
|
||||
if (extension_status == REPMGR_UNKNOWN)
|
||||
{
|
||||
log_error(_("unable to determine status of \"repmgr\" extension"));
|
||||
log_detail("%s", PQerrorMessage(local_conn));
|
||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||
close_connection(&local_conn);
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
@@ -846,6 +846,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||
success = false;
|
||||
}
|
||||
PQfinish(test_conn);
|
||||
|
||||
Reference in New Issue
Block a user