Improve logging output during failover process

This commit is contained in:
Ian Barwick
2017-08-24 22:44:03 +09:00
parent 7a00ad930a
commit fcd111ac4c
7 changed files with 80 additions and 17 deletions

View File

@@ -1525,6 +1525,34 @@ identify_system(PGconn *repl_conn, t_system_identification *identification)
}
bool
repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
{
PQExpBufferData query;
PGresult *res = NULL;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT repmgr.set_local_node_id(%i)",
local_node_id);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
PQclear(res);
return false;
}
PQclear(res);
return true;
}
/* ================ */
/* result functions */
/* ================ */
@@ -3599,6 +3627,7 @@ announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_no
return retval;
}
void
notify_follow_primary(PGconn *conn, int primary_node_id)
{

View File

@@ -365,6 +365,7 @@ int get_primary_node_id(PGconn *conn);
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
int get_ready_archive_files(PGconn *conn, const char *data_directory);
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
/* extension functions */
ExtensionStatus get_repmgr_extension_status(PGconn *conn);

View File

@@ -56,6 +56,11 @@ LEFT JOIN repmgr.nodes un
/* monitoring functions */
CREATE FUNCTION set_local_node_id(INT)
RETURNS VOID
AS '$libdir/repmgr', 'set_local_node_id'
LANGUAGE C STRICT;
CREATE FUNCTION standby_set_last_updated()
RETURNS TIMESTAMP WITH TIME ZONE
AS '$libdir/repmgr', 'standby_set_last_updated'
@@ -66,7 +71,6 @@ CREATE FUNCTION standby_get_last_updated()
AS '$libdir/repmgr', 'standby_get_last_updated'
LANGUAGE C STRICT;
/* failover functions */
CREATE FUNCTION request_vote(INT,INT)

View File

@@ -1141,8 +1141,6 @@ _do_standby_promote_internal(const char *data_dir)
PGconn *conn = NULL;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
log_notice(_("promoting standby"));
/*
* Promote standby to primary.
*
@@ -1153,8 +1151,8 @@ _do_standby_promote_internal(const char *data_dir)
get_server_action(ACTION_PROMOTE, script, (char *)data_dir);
log_notice(_("promoting server using '%s'"),
script);
log_notice(_("promoting standby"));
log_detail(_("promoting server using '%s'"), script);
r = system(script);
if (r != 0)

View File

@@ -46,6 +46,7 @@ typedef struct repmgrdSharedState
{
LWLockId lock; /* protects search/modification */
TimestampTz last_updated;
int local_node_id;
/* streaming failover */
NodeState node_state;
NodeVotingStatus voting_status;
@@ -66,6 +67,9 @@ void _PG_fini(void);
static void repmgr_shmem_startup(void);
Datum set_local_node_id(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(set_local_node_id);
Datum standby_set_last_updated(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(standby_set_last_updated);
@@ -107,9 +111,9 @@ PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
void
_PG_init(void)
{
elog(INFO, "repmgr init");
elog(DEBUG1, "repmgr init");
// error here?
/* error here? */
if (!process_shared_preload_libraries_in_progress)
return;
@@ -128,6 +132,8 @@ _PG_init(void)
shmem_startup_hook = repmgr_shmem_startup;
}
/*
* Module unload callback
*/
@@ -171,6 +177,7 @@ repmgr_shmem_startup(void)
shared_state->lock = LWLockAssign();
#endif
shared_state->local_node_id = UNKNOWN_NODE_ID;
shared_state->current_electoral_term = 0;
shared_state->voting_status = VS_NO_VOTE;
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
@@ -186,6 +193,21 @@ repmgr_shmem_startup(void)
/* monitoring functions */
/* ==================== */
Datum
set_local_node_id(PG_FUNCTION_ARGS)
{
int local_node_id = PG_GETARG_INT32(0);
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
shared_state->local_node_id = local_node_id;
LWLockRelease(shared_state->lock);
PG_RETURN_VOID();
}
/* update and return last updated with current timestamp */
Datum
standby_set_last_updated(PG_FUNCTION_ARGS)
@@ -254,7 +276,8 @@ request_vote(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
}
elog(INFO, "requesting node id is %i for electoral term %i (our term: %i)",
elog(INFO, "node %i has received request from node %i for electoral term %i (our term: %i)",
shared_state->local_node_id,
requesting_node_id, current_electoral_term,
shared_state->current_electoral_term);
@@ -270,7 +293,7 @@ request_vote(PG_FUNCTION_ARGS)
"SELECT pg_catalog.pg_last_xlog_receive_location()");
#endif
elog(INFO, "query: %s", query.data);
elog(DEBUG1, "query: %s", query.data);
ret = SPI_execute(query.data, true, 0);
if (ret < 0)
@@ -285,7 +308,7 @@ request_vote(PG_FUNCTION_ARGS)
1, &isnull));
elog(INFO, "Our LSN is %X/%X",
elog(DEBUG1, "our LSN is %X/%X",
(uint32) (our_lsn >> 32),
(uint32) our_lsn);
@@ -388,10 +411,12 @@ notify_follow_primary(PG_FUNCTION_ARGS)
if (!shared_state)
PG_RETURN_NULL();
elog(INFO, "received notification to follow node %i", primary_node_id);
LWLockAcquire(shared_state->lock, LW_SHARED);
elog(INFO, "node %i received notification to follow node %i",
shared_state->local_node_id,
primary_node_id);
/* Explicitly set the primary node id */
shared_state->candidate_node_id = primary_node_id;
shared_state->follow_new_primary = true;

View File

@@ -799,7 +799,7 @@ do_primary_failover(void)
}
else if (election_result == ELECTION_WON)
{
log_notice("I am the winner, will now promote self and inform other nodes");
log_notice("this node is the winner, will now promote self and inform other nodes");
failover_state = promote_self();
}
@@ -1648,7 +1648,7 @@ follow_new_primary(int new_primary_id)
local_node_info.node_id,
upstream_node_info.node_id);
log_notice("%s\n", event_details.data);
log_notice("%s", event_details.data);
create_event_notification(
upstream_conn,

View File

@@ -307,6 +307,8 @@ main(int argc, char **argv)
terminate(ERR_BAD_CONFIG);
}
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
{
log_debug("node id is %i", local_node_info.node_id);
@@ -639,10 +641,14 @@ try_reconnect(t_node_info *node_info)
PQfinish(conn);
log_notice(_("unable to reconnect to node"));
}
if (i + 1 < max_attempts)
{
log_info(_("sleeping %i seconds until next reconnection attempt"),
config_file_options.reconnect_interval);
sleep(config_file_options.reconnect_interval);
}
}
log_warning(_("unable to reconnect to node %i after %i attempts"),