mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
Improve handling of event logging in rempgrd
Provide the master connection if available, and if not enable create_event_record() to skip trying to write to the database, but execute the notification program if defined.
This commit is contained in:
38
dbutils.c
38
dbutils.c
@@ -1105,12 +1105,18 @@ delete_node_record(PGconn *conn, int node, char *action)
|
|||||||
/*
|
/*
|
||||||
* create_event_record()
|
* create_event_record()
|
||||||
*
|
*
|
||||||
* Insert a record into the events table.
|
* If `conn` is not NULL, insert a record into the events table.
|
||||||
*
|
*
|
||||||
* If configuration parameter `event_notification_command` is set, also
|
* If configuration parameter `event_notification_command` is set, also
|
||||||
* attempt to execute that command.
|
* attempt to execute that command.
|
||||||
*
|
*
|
||||||
* Returns true if all operations succeeded, false if one or more failed.
|
* Returns true if all operations succeeded, false if one or more failed.
|
||||||
|
*
|
||||||
|
* Note this function may be called with `conn` set to NULL in cases where
|
||||||
|
* the master node is not available and it's therefore not possible to write
|
||||||
|
* an event record. In this case, if `event_notification_command` is set a user-
|
||||||
|
* defined notification to be generated; if not, this function will have
|
||||||
|
* no effect.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -1120,7 +1126,10 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
char event_timestamp[MAXLEN] = "";
|
char event_timestamp[MAXLEN] = "";
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
struct tm ts;
|
||||||
|
|
||||||
|
if(conn != NULL)
|
||||||
|
{
|
||||||
int n_node_id = htonl(node_id);
|
int n_node_id = htonl(node_id);
|
||||||
char *t_successful = successful ? "TRUE" : "FALSE";
|
char *t_successful = successful ? "TRUE" : "FALSE";
|
||||||
|
|
||||||
@@ -1135,6 +1144,7 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
0,
|
0,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
int binary[4] = {1, 0, 0, 0};
|
int binary[4] = {1, 0, 0, 0};
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
@@ -1159,30 +1169,36 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
|
|||||||
|
|
||||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
time_t now;
|
|
||||||
struct tm ts;
|
|
||||||
|
|
||||||
log_warning(_("Unable to create event record: %s\n"),
|
log_warning(_("Unable to create event record: %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
|
|
||||||
/*
|
|
||||||
* If the query fails for whatever reason, generate a
|
|
||||||
* current timestamp ourselves. This isn't quite the same
|
|
||||||
* format as PostgreSQL, but is close enough for diagnostic use.
|
|
||||||
*/
|
|
||||||
time(&now);
|
|
||||||
ts = *localtime(&now);
|
|
||||||
strftime(event_timestamp, MAXLEN, "%Y-%m-%d %H:%M:%S%z", &ts);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/* Store timestamp to send to the notification command */
|
||||||
strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
log_debug(_("Event timestamp is: %s\n"), event_timestamp);
|
log_debug(_("Event timestamp is: %s\n"), event_timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If no database connection provided, or the query failed, generate a
|
||||||
|
* current timestamp ourselves. This isn't quite the same
|
||||||
|
* format as PostgreSQL, but is close enough for diagnostic use.
|
||||||
|
*/
|
||||||
|
if(!strlen(event_timestamp))
|
||||||
|
{
|
||||||
|
time_t now;
|
||||||
|
|
||||||
|
time(&now);
|
||||||
|
ts = *localtime(&now);
|
||||||
|
strftime(event_timestamp, MAXLEN, "%Y-%m-%d %H:%M:%S%z", &ts);
|
||||||
|
}
|
||||||
|
|
||||||
/* an event notification command was provided - parse and execute it */
|
/* an event notification command was provided - parse and execute it */
|
||||||
if(strlen(options->event_notification_command))
|
if(strlen(options->event_notification_command))
|
||||||
|
|||||||
38
repmgrd.c
38
repmgrd.c
@@ -410,12 +410,15 @@ main(int argc, char **argv)
|
|||||||
_("unable to connect to master node '%s'"),
|
_("unable to connect to master node '%s'"),
|
||||||
local_options.cluster_name);
|
local_options.cluster_name);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
false,
|
false,
|
||||||
errmsg.data);
|
errmsg.data);
|
||||||
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -586,8 +589,10 @@ witness_monitor(void)
|
|||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to determine a valid master node, terminating..."));
|
_("unable to determine a valid master node, terminating..."));
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
create_event_record(my_local_conn,
|
|
||||||
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -659,7 +664,7 @@ witness_monitor(void)
|
|||||||
/*
|
/*
|
||||||
* Insert monitor info, this is basically the time and xlog replayed,
|
* Insert monitor info, this is basically the time and xlog replayed,
|
||||||
* applied on standby and current xlog location in primary.
|
* applied on standby and current xlog location in primary.
|
||||||
* Also do the math to see how far are we in bytes for being uptodate
|
* Also do the math to see how far are we in bytes for being up-to-date
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
standby_monitor(void)
|
standby_monitor(void)
|
||||||
@@ -705,12 +710,13 @@ standby_monitor(void)
|
|||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
create_event_record(primary_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
false,
|
false,
|
||||||
errmsg.data);
|
errmsg.data);
|
||||||
|
|
||||||
terminate(ERR_DB_CON);
|
terminate(ERR_DB_CON);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -777,9 +783,10 @@ standby_monitor(void)
|
|||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("Unable to reconnect to master after %i attempts, terminating..."),
|
_("Unable to reconnect to master after %i attempts, terminating..."),
|
||||||
local_options.reconnect_attempts);
|
local_options.reconnect_attempts);
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -819,11 +826,11 @@ standby_monitor(void)
|
|||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to reconnect to new upstream node, terminating...")
|
_("unable to reconnect to new upstream node, terminating..."));
|
||||||
);
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
create_event_record(primary_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -1434,7 +1441,9 @@ do_primary_failover(void)
|
|||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
create_event_record(my_local_conn,
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
|
create_event_record(NULL,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
"repmgrd_failover_promote",
|
"repmgrd_failover_promote",
|
||||||
@@ -1444,7 +1453,7 @@ do_primary_failover(void)
|
|||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update internal record for this node*/
|
/* update internal record for this node */
|
||||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
@@ -1452,6 +1461,7 @@ do_primary_failover(void)
|
|||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
|
/* my_local_conn is now the master */
|
||||||
create_event_record(my_local_conn,
|
create_event_record(my_local_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
@@ -1500,6 +1510,8 @@ do_primary_failover(void)
|
|||||||
node_info.slot_name,
|
node_info.slot_name,
|
||||||
PQerrorMessage(new_primary_conn));
|
PQerrorMessage(new_primary_conn));
|
||||||
|
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_primary_conn,
|
create_event_record(new_primary_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
@@ -1507,8 +1519,6 @@ do_primary_failover(void)
|
|||||||
false,
|
false,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_primary_conn);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
@@ -1532,6 +1542,8 @@ do_primary_failover(void)
|
|||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
|
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_primary_conn,
|
create_event_record(new_primary_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
@@ -1685,7 +1697,7 @@ check_connection(PGconn *conn, const char *type)
|
|||||||
int connection_retries;
|
int connection_retries;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the master is still available if after
|
* Check if the node is still available if after
|
||||||
* local_options.reconnect_attempts * local_options.reconnect_intvl
|
* local_options.reconnect_attempts * local_options.reconnect_intvl
|
||||||
* seconds of retries we cannot reconnect return false
|
* seconds of retries we cannot reconnect return false
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user