repmgrd: further BDR sanity checks

This commit is contained in:
Ian Barwick
2017-07-14 10:27:28 +09:00
parent 875cdf5102
commit dfcf85a62f
3 changed files with 115 additions and 6 deletions

View File

@@ -2756,8 +2756,8 @@ reset_voting_status(PGconn *conn)
// COMMAND_OK?
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute repmgr..reset_voting_status():\n %s"),
PQerrorMessage(conn));
log_error(_("unable to execute repmgr.reset_voting_status():\n %s"),
PQerrorMessage(conn));
}
PQclear(res);
@@ -2775,6 +2775,7 @@ get_last_wal_receive_location(PGconn *conn)
PGresult *res;
XLogRecPtr ptr = InvalidXLogRecPtr;
// pre-10 !!!
res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
if (PQresultStatus(res) == PGRES_TUPLES_OK)
@@ -2822,6 +2823,41 @@ is_bdr_db(PGconn *conn)
}
bool
is_active_bdr_node(PGconn *conn, const char *node_name)
{
PQExpBufferData query;
PGresult *res;
bool is_active_bdr_node;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT COALESCE(s.active, TRUE) AS active"
" FROM bdr.bdr_nodes n "
" LEFT JOIN pg_replication_slots s "
" ON slot_name=bdr.bdr_format_slot_name(n.node_sysid, n.node_timeline, n.node_dboid, (SELECT oid FROM pg_database WHERE datname = current_database())) "
" WHERE node_name='%s' ",
node_name);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0)
{
is_active_bdr_node = false;
}
else
{
is_active_bdr_node = atoi(PQgetvalue(res, 0, 0)) == 1 ? true : false;
}
PQclear(res);
return is_active_bdr_node;
}
bool
is_bdr_repmgr(PGconn *conn)
{
@@ -3034,6 +3070,57 @@ get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list)
return;
}
RecordStatus
get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info)
{
PQExpBufferData query;
PGresult *res;
initPQExpBuffer(&query);
appendPQExpBuffer(
&query,
" SELECT node_sysid, "
" node_timeline, "
" node_dboid, "
" node_status, "
" node_name, "
" node_local_dsn, "
" node_init_from_dsn, "
" node_read_only, "
" node_seq_id "
" FROM bdr.bdr_nodes "
" WHERE node_name = '%s'",
node_name);
log_verbose(LOG_DEBUG, "get_bdr_node_record_by_name():\n%s", query.data);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to retrieve BDR node record for \"%s\":\n %s"),
node_name,
PQerrorMessage(conn));
PQclear(res);
return RECORD_ERROR;
}
if (PQntuples(res) == 0)
{
PQclear(res);
return RECORD_NOT_FOUND;
}
_populate_bdr_node_record(res, node_info, 0);
PQclear(res);
return RECORD_FOUND;
}
static
void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list)

View File

@@ -310,7 +310,9 @@ XLogRecPtr get_last_wal_receive_location(PGconn *conn);
/* BDR functions */
void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
bool is_bdr_db(PGconn *conn);
bool is_active_bdr_node(PGconn *conn, const char *node_name);
bool is_bdr_repmgr(PGconn *conn);
bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);

View File

@@ -24,11 +24,12 @@ do_bdr_node_check(void)
void
monitor_bdr(void)
{
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
PGconn *monitoring_conn = NULL;
t_node_info *monitored_node = NULL;
RecordStatus record_status;
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
PGconn *monitoring_conn = NULL;
t_node_info *monitored_node = NULL;
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
RecordStatus record_status;
bool failover_done = false;
/* sanity check local database */
@@ -72,6 +73,16 @@ monitor_bdr(void)
exit(ERR_BAD_CONFIG);
}
record_status = get_bdr_node_record_by_name(local_conn, local_node_info.node_name, &bdr_node_info);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve BDR record for node %s, terminating"),
local_node_info.node_name);
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
/* Retrieve record for this node from the local database */
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info);
@@ -88,6 +99,7 @@ monitor_bdr(void)
exit(ERR_BAD_CONFIG);
}
if (local_node_info.active == false)
{
log_error(_("local node (ID: %i) is marked as inactive in repmgr"),
@@ -97,6 +109,14 @@ monitor_bdr(void)
exit(ERR_BAD_CONFIG);
}
if (is_active_bdr_node(local_conn, local_node_info.node_name))
{
log_error(_("BDR node %s is not active, terminating"),
local_node_info.node_name);
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
/* Log startup event */
create_event_record(local_conn,