repmgrd: improve BDR recovery handling

This commit is contained in:
Ian Barwick
2017-07-27 11:53:55 +09:00
parent d8a1799215
commit dc24d62009
5 changed files with 128 additions and 23 deletions

View File

@@ -3185,6 +3185,76 @@ bdr_node_exists(PGconn *conn, const char *node_name)
} }
ReplSlotStatus
get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name)
{
PQExpBufferData query;
PGresult *res;
ReplSlotStatus status;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT s.active "
" FROM pg_catalog.pg_replication_slots s "
" WHERE slot_name = "
" (SELECT bdr.bdr_format_slot_name(node_sysid, node_timeline, node_dboid, datoid) "
" FROM bdr.bdr_nodes "
" WHERE node_name = '%s') ",
node_name);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
{
status = SLOT_UNKNOWN;
}
else
{
status = (strcmp(PQgetvalue(res, 0, 0), "t") == 0)
? SLOT_ACTIVE
: SLOT_INACTIVE;
}
PQclear(res);
return status;
}
void
get_bdr_other_node_name(PGconn *conn, int node_id, char *node_name)
{
PQExpBufferData query;
PGresult *res;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT node_name "
" FROM repmgr.nodes "
" WHERE node_id != %i",
node_id);
log_verbose(LOG_DEBUG, "get_bdr_other_node_name():\n %s", query.data);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if(PQresultStatus(res) == PGRES_TUPLES_OK)
{
strncpy(node_name, PQgetvalue(res, 0, 0), MAXLEN);
}
PQclear(res);
return;
}
void void
add_extension_tables_to_bdr_replication_set(PGconn *conn) add_extension_tables_to_bdr_replication_set(PGconn *conn)
{ {

View File

@@ -60,6 +60,13 @@ typedef enum {
VR_NEGATIVE_VOTE VR_NEGATIVE_VOTE
} VoteRequestResult; } VoteRequestResult;
typedef enum {
SLOT_UNKNOWN = -1,
SLOT_INACTIVE,
SLOT_ACTIVE
} ReplSlotStatus;
/* /*
* Struct to store node information * Struct to store node information
*/ */
@@ -356,6 +363,8 @@ bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, con
void add_extension_tables_to_bdr_replication_set(PGconn *conn); void add_extension_tables_to_bdr_replication_set(PGconn *conn);
bool bdr_node_exists(PGconn *conn, const char *node_name); bool bdr_node_exists(PGconn *conn, const char *node_name);
ReplSlotStatus get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name);
void get_bdr_other_node_name(PGconn *conn, int node_id, char *name_buf);
bool am_bdr_failover_handler(PGconn *conn, int node_id); bool am_bdr_failover_handler(PGconn *conn, int node_id);
void unset_bdr_failover_handler(PGconn *conn); void unset_bdr_failover_handler(PGconn *conn);

View File

@@ -224,7 +224,8 @@ do_node_status(void)
{ {
log_warning(_("following issue(s) were detected:")); log_warning(_("following issue(s) were detected:"));
print_item_list(&warnings); print_item_list(&warnings);
log_hint(_("execute \"repmgr node check\" for more details")); /* add this when functionality implemented */
/* log_hint(_("execute \"repmgr node check\" for more details")); */
} }
} }

View File

@@ -1300,6 +1300,9 @@ action_name(const int action)
; case BDR_UNREGISTER: ; case BDR_UNREGISTER:
return "BDR UNREGISTER"; return "BDR UNREGISTER";
case NODE_STATUS:
return "NODE STATUS";
case CLUSTER_SHOW: case CLUSTER_SHOW:
return "CLUSTER SHOW"; return "CLUSTER SHOW";
case CLUSTER_EVENT: case CLUSTER_EVENT:
@@ -1358,7 +1361,7 @@ do_help(void)
#endif #endif
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname()); printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
printf(_(" %s [OPTIONS] node status\n"), progname()); printf(_(" %s [OPTIONS] node status\n"), progname());
printf(_(" %s [OPTIONS] cluster {show|matrix|crosscheck|cleanup}\n"), progname()); printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
puts(""); puts("");

View File

@@ -416,13 +416,16 @@ static void
do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
{ {
PGconn *recovered_node_conn; PGconn *recovered_node_conn;
PGconn *slot_check_conn;
PQExpBufferData event_details; PQExpBufferData event_details;
t_bdr_node_info bdr_record;
t_event_info event_info = T_EVENT_INFO_INITIALIZER; t_event_info event_info = T_EVENT_INFO_INITIALIZER;
int i; int i;
bool node_recovered = false; bool node_recovered = false;
int node_recovery_elapsed; int node_recovery_elapsed;
char node_name[MAXLEN] = "";
recovered_node_conn = establish_db_connection(monitored_node->conninfo, false); recovered_node_conn = establish_db_connection(monitored_node->conninfo, false);
if (PQstatus(recovered_node_conn) != CONNECTION_OK) if (PQstatus(recovered_node_conn) != CONNECTION_OK)
@@ -431,23 +434,30 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
return; return;
} }
if (am_bdr_failover_handler(recovered_node_conn, local_node_info.node_id) == false) /* determine which replication slot to look fore */
if (monitored_node->node_id == local_node_info.node_id)
{ {
PQfinish(recovered_node_conn); slot_check_conn = recovered_node_conn;
log_debug("other node's repmgrd is handling recovery"); get_bdr_other_node_name(recovered_node_conn, local_node_info.node_id, node_name);
return; }
else
{
slot_check_conn = local_conn;
strncpy(node_name, monitored_node->node_name, MAXLEN);
} }
for (i = 0; i < config_file_options.bdr_recovery_timeout; i++) for (i = 0; i < config_file_options.bdr_recovery_timeout; i++)
{ {
RecordStatus record_status = get_bdr_node_record_by_name( ReplSlotStatus slot_status;
recovered_node_conn,
monitored_node->node_name,
&bdr_record);
if (record_status == RECORD_FOUND && bdr_record.node_status == 'r') log_debug("checking for state of replication slot for node \"%s\"", node_name);
slot_status = get_bdr_node_replication_slot_status(
slot_check_conn,
node_name);
if (slot_status == SLOT_ACTIVE)
{ {
// check pg_stat_replication
node_recovered = true; node_recovered = true;
break; break;
} }
@@ -456,32 +466,46 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
continue; continue;
} }
if (node_recovered == false) if (node_recovered == false)
{ {
log_warning(_("node did not come up")); log_warning(_("no active replication slot for node \"%s\" found after %i seconds"),
node_name,
config_file_options.bdr_recovery_timeout);
log_detail(_("this probably means inter-node BDR connections have not been re-established"));
PQfinish(recovered_node_conn); PQfinish(recovered_node_conn);
return; return;
} }
log_info(_("active replication slot for node \"%s\" found after %i seconds"),
node_name,
i);
// XXX check other node is attached to this one so we node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
// don't end up monitoring a parted node; if not attached, monitored_node->monitoring_state = MS_NORMAL;
// generate a failed bdr_recovery event
initPQExpBuffer(&event_details); initPQExpBuffer(&event_details);
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
_("node '%s' (ID: %i) has recovered after %i seconds"), _("node \"%s\" (ID: %i) has recovered after %i seconds"),
monitored_node->node_name, monitored_node->node_name,
monitored_node->node_id, monitored_node->node_id,
node_recovery_elapsed); node_recovery_elapsed);
monitored_node->monitoring_state = MS_NORMAL;
log_notice("%s", event_details.data); log_notice("%s", event_details.data);
/* other node will generate the event */
if (monitored_node->node_id == local_node_info.node_id)
{
termPQExpBuffer(&event_details);
PQfinish(recovered_node_conn);
return;
}
/* generate the event on the currently active node only */ /* generate the event on the currently active node only */
if (monitored_node->node_id != local_node_info.node_id) if (monitored_node->node_id != local_node_info.node_id)
{ {
@@ -513,8 +537,6 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
unset_bdr_failover_handler(recovered_node_conn);
PQfinish(recovered_node_conn); PQfinish(recovered_node_conn);
return; return;