mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Improve logging output during failover process
This commit is contained in:
29
dbutils.c
29
dbutils.c
@@ -1525,6 +1525,34 @@ identify_system(PGconn *repl_conn, t_system_identification *identification)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBuffer(
|
||||||
|
&query,
|
||||||
|
" SELECT repmgr.set_local_node_id(%i)",
|
||||||
|
local_node_id);
|
||||||
|
|
||||||
|
res = PQexec(conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ================ */
|
/* ================ */
|
||||||
/* result functions */
|
/* result functions */
|
||||||
/* ================ */
|
/* ================ */
|
||||||
@@ -3599,6 +3627,7 @@ announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_no
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
notify_follow_primary(PGconn *conn, int primary_node_id)
|
notify_follow_primary(PGconn *conn, int primary_node_id)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -365,6 +365,7 @@ int get_primary_node_id(PGconn *conn);
|
|||||||
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||||
|
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||||
|
|
||||||
/* extension functions */
|
/* extension functions */
|
||||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||||
|
|||||||
@@ -56,6 +56,11 @@ LEFT JOIN repmgr.nodes un
|
|||||||
|
|
||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION set_local_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS '$libdir/repmgr', 'set_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
CREATE FUNCTION standby_set_last_updated()
|
CREATE FUNCTION standby_set_last_updated()
|
||||||
RETURNS TIMESTAMP WITH TIME ZONE
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
AS '$libdir/repmgr', 'standby_set_last_updated'
|
AS '$libdir/repmgr', 'standby_set_last_updated'
|
||||||
@@ -66,7 +71,6 @@ CREATE FUNCTION standby_get_last_updated()
|
|||||||
AS '$libdir/repmgr', 'standby_get_last_updated'
|
AS '$libdir/repmgr', 'standby_get_last_updated'
|
||||||
LANGUAGE C STRICT;
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
/* failover functions */
|
/* failover functions */
|
||||||
|
|
||||||
CREATE FUNCTION request_vote(INT,INT)
|
CREATE FUNCTION request_vote(INT,INT)
|
||||||
|
|||||||
@@ -1141,8 +1141,6 @@ _do_standby_promote_internal(const char *data_dir)
|
|||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
||||||
|
|
||||||
log_notice(_("promoting standby"));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Promote standby to primary.
|
* Promote standby to primary.
|
||||||
*
|
*
|
||||||
@@ -1153,8 +1151,8 @@ _do_standby_promote_internal(const char *data_dir)
|
|||||||
|
|
||||||
get_server_action(ACTION_PROMOTE, script, (char *)data_dir);
|
get_server_action(ACTION_PROMOTE, script, (char *)data_dir);
|
||||||
|
|
||||||
log_notice(_("promoting server using '%s'"),
|
log_notice(_("promoting standby"));
|
||||||
script);
|
log_detail(_("promoting server using '%s'"), script);
|
||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
|
|||||||
39
repmgr.c
39
repmgr.c
@@ -46,6 +46,7 @@ typedef struct repmgrdSharedState
|
|||||||
{
|
{
|
||||||
LWLockId lock; /* protects search/modification */
|
LWLockId lock; /* protects search/modification */
|
||||||
TimestampTz last_updated;
|
TimestampTz last_updated;
|
||||||
|
int local_node_id;
|
||||||
/* streaming failover */
|
/* streaming failover */
|
||||||
NodeState node_state;
|
NodeState node_state;
|
||||||
NodeVotingStatus voting_status;
|
NodeVotingStatus voting_status;
|
||||||
@@ -66,6 +67,9 @@ void _PG_fini(void);
|
|||||||
|
|
||||||
static void repmgr_shmem_startup(void);
|
static void repmgr_shmem_startup(void);
|
||||||
|
|
||||||
|
Datum set_local_node_id(PG_FUNCTION_ARGS);
|
||||||
|
PG_FUNCTION_INFO_V1(set_local_node_id);
|
||||||
|
|
||||||
Datum standby_set_last_updated(PG_FUNCTION_ARGS);
|
Datum standby_set_last_updated(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(standby_set_last_updated);
|
PG_FUNCTION_INFO_V1(standby_set_last_updated);
|
||||||
|
|
||||||
@@ -107,9 +111,9 @@ PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
|
|||||||
void
|
void
|
||||||
_PG_init(void)
|
_PG_init(void)
|
||||||
{
|
{
|
||||||
elog(INFO, "repmgr init");
|
elog(DEBUG1, "repmgr init");
|
||||||
|
|
||||||
// error here?
|
/* error here? */
|
||||||
if (!process_shared_preload_libraries_in_progress)
|
if (!process_shared_preload_libraries_in_progress)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -128,6 +132,8 @@ _PG_init(void)
|
|||||||
shmem_startup_hook = repmgr_shmem_startup;
|
shmem_startup_hook = repmgr_shmem_startup;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Module unload callback
|
* Module unload callback
|
||||||
*/
|
*/
|
||||||
@@ -171,6 +177,7 @@ repmgr_shmem_startup(void)
|
|||||||
shared_state->lock = LWLockAssign();
|
shared_state->lock = LWLockAssign();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
shared_state->local_node_id = UNKNOWN_NODE_ID;
|
||||||
shared_state->current_electoral_term = 0;
|
shared_state->current_electoral_term = 0;
|
||||||
shared_state->voting_status = VS_NO_VOTE;
|
shared_state->voting_status = VS_NO_VOTE;
|
||||||
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
||||||
@@ -186,6 +193,21 @@ repmgr_shmem_startup(void)
|
|||||||
/* monitoring functions */
|
/* monitoring functions */
|
||||||
/* ==================== */
|
/* ==================== */
|
||||||
|
|
||||||
|
Datum
|
||||||
|
set_local_node_id(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
int local_node_id = PG_GETARG_INT32(0);
|
||||||
|
|
||||||
|
if (!shared_state)
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
|
||||||
|
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||||
|
shared_state->local_node_id = local_node_id;
|
||||||
|
LWLockRelease(shared_state->lock);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
/* update and return last updated with current timestamp */
|
/* update and return last updated with current timestamp */
|
||||||
Datum
|
Datum
|
||||||
standby_set_last_updated(PG_FUNCTION_ARGS)
|
standby_set_last_updated(PG_FUNCTION_ARGS)
|
||||||
@@ -254,7 +276,8 @@ request_vote(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_NULL();
|
PG_RETURN_NULL();
|
||||||
}
|
}
|
||||||
|
|
||||||
elog(INFO, "requesting node id is %i for electoral term %i (our term: %i)",
|
elog(INFO, "node %i has received request from node %i for electoral term %i (our term: %i)",
|
||||||
|
shared_state->local_node_id,
|
||||||
requesting_node_id, current_electoral_term,
|
requesting_node_id, current_electoral_term,
|
||||||
shared_state->current_electoral_term);
|
shared_state->current_electoral_term);
|
||||||
|
|
||||||
@@ -270,7 +293,7 @@ request_vote(PG_FUNCTION_ARGS)
|
|||||||
"SELECT pg_catalog.pg_last_xlog_receive_location()");
|
"SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
elog(INFO, "query: %s", query.data);
|
elog(DEBUG1, "query: %s", query.data);
|
||||||
ret = SPI_execute(query.data, true, 0);
|
ret = SPI_execute(query.data, true, 0);
|
||||||
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
@@ -285,7 +308,7 @@ request_vote(PG_FUNCTION_ARGS)
|
|||||||
1, &isnull));
|
1, &isnull));
|
||||||
|
|
||||||
|
|
||||||
elog(INFO, "Our LSN is %X/%X",
|
elog(DEBUG1, "our LSN is %X/%X",
|
||||||
(uint32) (our_lsn >> 32),
|
(uint32) (our_lsn >> 32),
|
||||||
(uint32) our_lsn);
|
(uint32) our_lsn);
|
||||||
|
|
||||||
@@ -388,10 +411,12 @@ notify_follow_primary(PG_FUNCTION_ARGS)
|
|||||||
if (!shared_state)
|
if (!shared_state)
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_NULL();
|
||||||
|
|
||||||
elog(INFO, "received notification to follow node %i", primary_node_id);
|
|
||||||
|
|
||||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||||
|
|
||||||
|
elog(INFO, "node %i received notification to follow node %i",
|
||||||
|
shared_state->local_node_id,
|
||||||
|
primary_node_id);
|
||||||
|
|
||||||
/* Explicitly set the primary node id */
|
/* Explicitly set the primary node id */
|
||||||
shared_state->candidate_node_id = primary_node_id;
|
shared_state->candidate_node_id = primary_node_id;
|
||||||
shared_state->follow_new_primary = true;
|
shared_state->follow_new_primary = true;
|
||||||
|
|||||||
@@ -799,7 +799,7 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
else if (election_result == ELECTION_WON)
|
else if (election_result == ELECTION_WON)
|
||||||
{
|
{
|
||||||
log_notice("I am the winner, will now promote self and inform other nodes");
|
log_notice("this node is the winner, will now promote self and inform other nodes");
|
||||||
|
|
||||||
failover_state = promote_self();
|
failover_state = promote_self();
|
||||||
}
|
}
|
||||||
@@ -1648,7 +1648,7 @@ follow_new_primary(int new_primary_id)
|
|||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
upstream_node_info.node_id);
|
upstream_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s\n", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
|
|
||||||
create_event_notification(
|
create_event_notification(
|
||||||
upstream_conn,
|
upstream_conn,
|
||||||
|
|||||||
12
repmgrd.c
12
repmgrd.c
@@ -307,6 +307,8 @@ main(int argc, char **argv)
|
|||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||||
|
|
||||||
if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
|
if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
|
||||||
{
|
{
|
||||||
log_debug("node id is %i", local_node_info.node_id);
|
log_debug("node id is %i", local_node_info.node_id);
|
||||||
@@ -639,9 +641,13 @@ try_reconnect(t_node_info *node_info)
|
|||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_notice(_("unable to reconnect to node"));
|
log_notice(_("unable to reconnect to node"));
|
||||||
}
|
}
|
||||||
log_info(_("sleeping %i seconds until next reconnection attempt"),
|
|
||||||
config_file_options.reconnect_interval);
|
if (i + 1 < max_attempts)
|
||||||
sleep(config_file_options.reconnect_interval);
|
{
|
||||||
|
log_info(_("sleeping %i seconds until next reconnection attempt"),
|
||||||
|
config_file_options.reconnect_interval);
|
||||||
|
sleep(config_file_options.reconnect_interval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user