Improve logging output during failover process

This commit is contained in:
Ian Barwick
2017-08-24 22:44:03 +09:00
parent 7a00ad930a
commit fcd111ac4c
7 changed files with 80 additions and 17 deletions

View File

@@ -1525,6 +1525,34 @@ identify_system(PGconn *repl_conn, t_system_identification *identification)
} }
bool
repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
{
PQExpBufferData query;
PGresult *res = NULL;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT repmgr.set_local_node_id(%i)",
local_node_id);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
PQclear(res);
return false;
}
PQclear(res);
return true;
}
/* ================ */ /* ================ */
/* result functions */ /* result functions */
/* ================ */ /* ================ */
@@ -3599,6 +3627,7 @@ announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_no
return retval; return retval;
} }
void void
notify_follow_primary(PGconn *conn, int primary_node_id) notify_follow_primary(PGconn *conn, int primary_node_id)
{ {

View File

@@ -365,6 +365,7 @@ int get_primary_node_id(PGconn *conn);
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
int get_ready_archive_files(PGconn *conn, const char *data_directory); int get_ready_archive_files(PGconn *conn, const char *data_directory);
bool identify_system(PGconn *repl_conn, t_system_identification *identification); bool identify_system(PGconn *repl_conn, t_system_identification *identification);
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
/* extension functions */ /* extension functions */
ExtensionStatus get_repmgr_extension_status(PGconn *conn); ExtensionStatus get_repmgr_extension_status(PGconn *conn);

View File

@@ -56,6 +56,11 @@ LEFT JOIN repmgr.nodes un
/* monitoring functions */ /* monitoring functions */
CREATE FUNCTION set_local_node_id(INT)
RETURNS VOID
AS '$libdir/repmgr', 'set_local_node_id'
LANGUAGE C STRICT;
CREATE FUNCTION standby_set_last_updated() CREATE FUNCTION standby_set_last_updated()
RETURNS TIMESTAMP WITH TIME ZONE RETURNS TIMESTAMP WITH TIME ZONE
AS '$libdir/repmgr', 'standby_set_last_updated' AS '$libdir/repmgr', 'standby_set_last_updated'
@@ -66,7 +71,6 @@ CREATE FUNCTION standby_get_last_updated()
AS '$libdir/repmgr', 'standby_get_last_updated' AS '$libdir/repmgr', 'standby_get_last_updated'
LANGUAGE C STRICT; LANGUAGE C STRICT;
/* failover functions */ /* failover functions */
CREATE FUNCTION request_vote(INT,INT) CREATE FUNCTION request_vote(INT,INT)

View File

@@ -1141,8 +1141,6 @@ _do_standby_promote_internal(const char *data_dir)
PGconn *conn = NULL; PGconn *conn = NULL;
RecoveryType recovery_type = RECTYPE_UNKNOWN; RecoveryType recovery_type = RECTYPE_UNKNOWN;
log_notice(_("promoting standby"));
/* /*
* Promote standby to primary. * Promote standby to primary.
* *
@@ -1153,8 +1151,8 @@ _do_standby_promote_internal(const char *data_dir)
get_server_action(ACTION_PROMOTE, script, (char *)data_dir); get_server_action(ACTION_PROMOTE, script, (char *)data_dir);
log_notice(_("promoting server using '%s'"), log_notice(_("promoting standby"));
script); log_detail(_("promoting server using '%s'"), script);
r = system(script); r = system(script);
if (r != 0) if (r != 0)

View File

@@ -46,6 +46,7 @@ typedef struct repmgrdSharedState
{ {
LWLockId lock; /* protects search/modification */ LWLockId lock; /* protects search/modification */
TimestampTz last_updated; TimestampTz last_updated;
int local_node_id;
/* streaming failover */ /* streaming failover */
NodeState node_state; NodeState node_state;
NodeVotingStatus voting_status; NodeVotingStatus voting_status;
@@ -66,6 +67,9 @@ void _PG_fini(void);
static void repmgr_shmem_startup(void); static void repmgr_shmem_startup(void);
Datum set_local_node_id(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(set_local_node_id);
Datum standby_set_last_updated(PG_FUNCTION_ARGS); Datum standby_set_last_updated(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(standby_set_last_updated); PG_FUNCTION_INFO_V1(standby_set_last_updated);
@@ -107,9 +111,9 @@ PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
void void
_PG_init(void) _PG_init(void)
{ {
elog(INFO, "repmgr init"); elog(DEBUG1, "repmgr init");
// error here? /* error here? */
if (!process_shared_preload_libraries_in_progress) if (!process_shared_preload_libraries_in_progress)
return; return;
@@ -128,6 +132,8 @@ _PG_init(void)
shmem_startup_hook = repmgr_shmem_startup; shmem_startup_hook = repmgr_shmem_startup;
} }
/* /*
* Module unload callback * Module unload callback
*/ */
@@ -171,6 +177,7 @@ repmgr_shmem_startup(void)
shared_state->lock = LWLockAssign(); shared_state->lock = LWLockAssign();
#endif #endif
shared_state->local_node_id = UNKNOWN_NODE_ID;
shared_state->current_electoral_term = 0; shared_state->current_electoral_term = 0;
shared_state->voting_status = VS_NO_VOTE; shared_state->voting_status = VS_NO_VOTE;
shared_state->candidate_node_id = UNKNOWN_NODE_ID; shared_state->candidate_node_id = UNKNOWN_NODE_ID;
@@ -186,6 +193,21 @@ repmgr_shmem_startup(void)
/* monitoring functions */ /* monitoring functions */
/* ==================== */ /* ==================== */
Datum
set_local_node_id(PG_FUNCTION_ARGS)
{
int local_node_id = PG_GETARG_INT32(0);
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
shared_state->local_node_id = local_node_id;
LWLockRelease(shared_state->lock);
PG_RETURN_VOID();
}
/* update and return last updated with current timestamp */ /* update and return last updated with current timestamp */
Datum Datum
standby_set_last_updated(PG_FUNCTION_ARGS) standby_set_last_updated(PG_FUNCTION_ARGS)
@@ -254,7 +276,8 @@ request_vote(PG_FUNCTION_ARGS)
PG_RETURN_NULL(); PG_RETURN_NULL();
} }
elog(INFO, "requesting node id is %i for electoral term %i (our term: %i)", elog(INFO, "node %i has received request from node %i for electoral term %i (our term: %i)",
shared_state->local_node_id,
requesting_node_id, current_electoral_term, requesting_node_id, current_electoral_term,
shared_state->current_electoral_term); shared_state->current_electoral_term);
@@ -270,7 +293,7 @@ request_vote(PG_FUNCTION_ARGS)
"SELECT pg_catalog.pg_last_xlog_receive_location()"); "SELECT pg_catalog.pg_last_xlog_receive_location()");
#endif #endif
elog(INFO, "query: %s", query.data); elog(DEBUG1, "query: %s", query.data);
ret = SPI_execute(query.data, true, 0); ret = SPI_execute(query.data, true, 0);
if (ret < 0) if (ret < 0)
@@ -285,7 +308,7 @@ request_vote(PG_FUNCTION_ARGS)
1, &isnull)); 1, &isnull));
elog(INFO, "Our LSN is %X/%X", elog(DEBUG1, "our LSN is %X/%X",
(uint32) (our_lsn >> 32), (uint32) (our_lsn >> 32),
(uint32) our_lsn); (uint32) our_lsn);
@@ -388,10 +411,12 @@ notify_follow_primary(PG_FUNCTION_ARGS)
if (!shared_state) if (!shared_state)
PG_RETURN_NULL(); PG_RETURN_NULL();
elog(INFO, "received notification to follow node %i", primary_node_id);
LWLockAcquire(shared_state->lock, LW_SHARED); LWLockAcquire(shared_state->lock, LW_SHARED);
elog(INFO, "node %i received notification to follow node %i",
shared_state->local_node_id,
primary_node_id);
/* Explicitly set the primary node id */ /* Explicitly set the primary node id */
shared_state->candidate_node_id = primary_node_id; shared_state->candidate_node_id = primary_node_id;
shared_state->follow_new_primary = true; shared_state->follow_new_primary = true;

View File

@@ -799,7 +799,7 @@ do_primary_failover(void)
} }
else if (election_result == ELECTION_WON) else if (election_result == ELECTION_WON)
{ {
log_notice("I am the winner, will now promote self and inform other nodes"); log_notice("this node is the winner, will now promote self and inform other nodes");
failover_state = promote_self(); failover_state = promote_self();
} }
@@ -1648,7 +1648,7 @@ follow_new_primary(int new_primary_id)
local_node_info.node_id, local_node_info.node_id,
upstream_node_info.node_id); upstream_node_info.node_id);
log_notice("%s\n", event_details.data); log_notice("%s", event_details.data);
create_event_notification( create_event_notification(
upstream_conn, upstream_conn,

View File

@@ -307,6 +307,8 @@ main(int argc, char **argv)
terminate(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
if (config_file_options.replication_type == REPLICATION_TYPE_BDR) if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
{ {
log_debug("node id is %i", local_node_info.node_id); log_debug("node id is %i", local_node_info.node_id);
@@ -639,9 +641,13 @@ try_reconnect(t_node_info *node_info)
PQfinish(conn); PQfinish(conn);
log_notice(_("unable to reconnect to node")); log_notice(_("unable to reconnect to node"));
} }
log_info(_("sleeping %i seconds until next reconnection attempt"),
config_file_options.reconnect_interval); if (i + 1 < max_attempts)
sleep(config_file_options.reconnect_interval); {
log_info(_("sleeping %i seconds until next reconnection attempt"),
config_file_options.reconnect_interval);
sleep(config_file_options.reconnect_interval);
}
} }