repmgrd: improve BDR recovery handling

This commit is contained in:
Ian Barwick
2017-07-27 11:53:55 +09:00
parent d8a1799215
commit dc24d62009
5 changed files with 128 additions and 23 deletions

View File

@@ -3185,6 +3185,76 @@ bdr_node_exists(PGconn *conn, const char *node_name)
}
ReplSlotStatus
get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name)
{
PQExpBufferData query;
PGresult *res;
ReplSlotStatus status;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT s.active "
" FROM pg_catalog.pg_replication_slots s "
" WHERE slot_name = "
" (SELECT bdr.bdr_format_slot_name(node_sysid, node_timeline, node_dboid, datoid) "
" FROM bdr.bdr_nodes "
" WHERE node_name = '%s') ",
node_name);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
{
status = SLOT_UNKNOWN;
}
else
{
status = (strcmp(PQgetvalue(res, 0, 0), "t") == 0)
? SLOT_ACTIVE
: SLOT_INACTIVE;
}
PQclear(res);
return status;
}
void
get_bdr_other_node_name(PGconn *conn, int node_id, char *node_name)
{
PQExpBufferData query;
PGresult *res;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT node_name "
" FROM repmgr.nodes "
" WHERE node_id != %i",
node_id);
log_verbose(LOG_DEBUG, "get_bdr_other_node_name():\n %s", query.data);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if(PQresultStatus(res) == PGRES_TUPLES_OK)
{
strncpy(node_name, PQgetvalue(res, 0, 0), MAXLEN);
}
PQclear(res);
return;
}
void
add_extension_tables_to_bdr_replication_set(PGconn *conn)
{

View File

@@ -60,6 +60,13 @@ typedef enum {
VR_NEGATIVE_VOTE
} VoteRequestResult;
typedef enum {
SLOT_UNKNOWN = -1,
SLOT_INACTIVE,
SLOT_ACTIVE
} ReplSlotStatus;
/*
* Struct to store node information
*/
@@ -356,6 +363,8 @@ bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, con
void add_extension_tables_to_bdr_replication_set(PGconn *conn);
bool bdr_node_exists(PGconn *conn, const char *node_name);
ReplSlotStatus get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name);
void get_bdr_other_node_name(PGconn *conn, int node_id, char *name_buf);
bool am_bdr_failover_handler(PGconn *conn, int node_id);
void unset_bdr_failover_handler(PGconn *conn);

View File

@@ -224,7 +224,8 @@ do_node_status(void)
{
log_warning(_("following issue(s) were detected:"));
print_item_list(&warnings);
log_hint(_("execute \"repmgr node check\" for more details"));
/* add this when functionality implemented */
/* log_hint(_("execute \"repmgr node check\" for more details")); */
}
}

View File

@@ -1300,6 +1300,9 @@ action_name(const int action)
; case BDR_UNREGISTER:
return "BDR UNREGISTER";
case NODE_STATUS:
return "NODE STATUS";
case CLUSTER_SHOW:
return "CLUSTER SHOW";
case CLUSTER_EVENT:
@@ -1358,7 +1361,7 @@ do_help(void)
#endif
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
printf(_(" %s [OPTIONS] node status\n"), progname());
printf(_(" %s [OPTIONS] cluster {show|matrix|crosscheck|cleanup}\n"), progname());
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
puts("");

View File

@@ -416,13 +416,16 @@ static void
do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
{
PGconn *recovered_node_conn;
PGconn *slot_check_conn;
PQExpBufferData event_details;
t_bdr_node_info bdr_record;
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
int i;
bool node_recovered = false;
int node_recovery_elapsed;
char node_name[MAXLEN] = "";
recovered_node_conn = establish_db_connection(monitored_node->conninfo, false);
if (PQstatus(recovered_node_conn) != CONNECTION_OK)
@@ -431,23 +434,30 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
return;
}
if (am_bdr_failover_handler(recovered_node_conn, local_node_info.node_id) == false)
/* determine which replication slot to look fore */
if (monitored_node->node_id == local_node_info.node_id)
{
PQfinish(recovered_node_conn);
log_debug("other node's repmgrd is handling recovery");
return;
slot_check_conn = recovered_node_conn;
get_bdr_other_node_name(recovered_node_conn, local_node_info.node_id, node_name);
}
else
{
slot_check_conn = local_conn;
strncpy(node_name, monitored_node->node_name, MAXLEN);
}
for (i = 0; i < config_file_options.bdr_recovery_timeout; i++)
{
RecordStatus record_status = get_bdr_node_record_by_name(
recovered_node_conn,
monitored_node->node_name,
&bdr_record);
ReplSlotStatus slot_status;
if (record_status == RECORD_FOUND && bdr_record.node_status == 'r')
log_debug("checking for state of replication slot for node \"%s\"", node_name);
slot_status = get_bdr_node_replication_slot_status(
slot_check_conn,
node_name);
if (slot_status == SLOT_ACTIVE)
{
// check pg_stat_replication
node_recovered = true;
break;
}
@@ -456,32 +466,46 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
continue;
}
if (node_recovered == false)
{
log_warning(_("node did not come up"));
log_warning(_("no active replication slot for node \"%s\" found after %i seconds"),
node_name,
config_file_options.bdr_recovery_timeout);
log_detail(_("this probably means inter-node BDR connections have not been re-established"));
PQfinish(recovered_node_conn);
return;
}
log_info(_("active replication slot for node \"%s\" found after %i seconds"),
node_name,
i);
// XXX check other node is attached to this one so we
// don't end up monitoring a parted node; if not attached,
// generate a failed bdr_recovery event
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
monitored_node->monitoring_state = MS_NORMAL;
initPQExpBuffer(&event_details);
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
appendPQExpBuffer(&event_details,
_("node '%s' (ID: %i) has recovered after %i seconds"),
_("node \"%s\" (ID: %i) has recovered after %i seconds"),
monitored_node->node_name,
monitored_node->node_id,
node_recovery_elapsed);
monitored_node->monitoring_state = MS_NORMAL;
log_notice("%s", event_details.data);
/* other node will generate the event */
if (monitored_node->node_id == local_node_info.node_id)
{
termPQExpBuffer(&event_details);
PQfinish(recovered_node_conn);
return;
}
/* generate the event on the currently active node only */
if (monitored_node->node_id != local_node_info.node_id)
{
@@ -513,8 +537,6 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
termPQExpBuffer(&event_details);
unset_bdr_failover_handler(recovered_node_conn);
PQfinish(recovered_node_conn);
return;