From dfcf85a62f67df20897f2139364e95fa0cab1254 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 14 Jul 2017 10:27:28 +0900 Subject: [PATCH] repmgrd: further BDR sanity checks --- dbutils.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-- dbutils.h | 2 ++ repmgrd-bdr.c | 28 +++++++++++++--- 3 files changed, 115 insertions(+), 6 deletions(-) diff --git a/dbutils.c b/dbutils.c index d93ea0c6..9fe485d6 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2756,8 +2756,8 @@ reset_voting_status(PGconn *conn) // COMMAND_OK? if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) { - log_error(_("unable to execute repmgr..reset_voting_status():\n %s"), - PQerrorMessage(conn)); + log_error(_("unable to execute repmgr.reset_voting_status():\n %s"), + PQerrorMessage(conn)); } PQclear(res); @@ -2775,6 +2775,7 @@ get_last_wal_receive_location(PGconn *conn) PGresult *res; XLogRecPtr ptr = InvalidXLogRecPtr; + // pre-10 !!! res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()"); if (PQresultStatus(res) == PGRES_TUPLES_OK) @@ -2822,6 +2823,41 @@ is_bdr_db(PGconn *conn) } +bool +is_active_bdr_node(PGconn *conn, const char *node_name) +{ + PQExpBufferData query; + PGresult *res; + bool is_active_bdr_node; + + initPQExpBuffer(&query); + appendPQExpBuffer( + &query, + " SELECT COALESCE(s.active, TRUE) AS active" + " FROM bdr.bdr_nodes n " + " LEFT JOIN pg_replication_slots s " + " ON slot_name=bdr.bdr_format_slot_name(n.node_sysid, n.node_timeline, n.node_dboid, (SELECT oid FROM pg_database WHERE datname = current_database())) " + " WHERE node_name='%s' ", + node_name); + + res = PQexec(conn, query.data); + termPQExpBuffer(&query); + + if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) + { + is_active_bdr_node = false; + } + else + { + is_active_bdr_node = atoi(PQgetvalue(res, 0, 0)) == 1 ? true : false; + } + + PQclear(res); + + return is_active_bdr_node; +} + + bool is_bdr_repmgr(PGconn *conn) { @@ -3034,6 +3070,57 @@ get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list) return; } +RecordStatus +get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info) +{ + PQExpBufferData query; + PGresult *res; + + initPQExpBuffer(&query); + + appendPQExpBuffer( + &query, + " SELECT node_sysid, " + " node_timeline, " + " node_dboid, " + " node_status, " + " node_name, " + " node_local_dsn, " + " node_init_from_dsn, " + " node_read_only, " + " node_seq_id " + " FROM bdr.bdr_nodes " + " WHERE node_name = '%s'", + node_name); + + log_verbose(LOG_DEBUG, "get_bdr_node_record_by_name():\n%s", query.data); + + res = PQexec(conn, query.data); + termPQExpBuffer(&query); + + if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to retrieve BDR node record for \"%s\":\n %s"), + node_name, + PQerrorMessage(conn)); + + PQclear(res); + return RECORD_ERROR; + } + + if (PQntuples(res) == 0) + { + PQclear(res); + return RECORD_NOT_FOUND; + } + + _populate_bdr_node_record(res, node_info, 0); + + PQclear(res); + + return RECORD_FOUND; +} + static void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list) diff --git a/dbutils.h b/dbutils.h index b2b88335..2df11eb5 100644 --- a/dbutils.h +++ b/dbutils.h @@ -310,7 +310,9 @@ XLogRecPtr get_last_wal_receive_location(PGconn *conn); /* BDR functions */ void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list); +RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info); bool is_bdr_db(PGconn *conn); +bool is_active_bdr_node(PGconn *conn, const char *node_name); bool is_bdr_repmgr(PGconn *conn); bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); diff --git a/repmgrd-bdr.c b/repmgrd-bdr.c index c74b63e6..a0df1964 100644 --- a/repmgrd-bdr.c +++ b/repmgrd-bdr.c @@ -24,11 +24,12 @@ do_bdr_node_check(void) void monitor_bdr(void) { - NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; - PGconn *monitoring_conn = NULL; - t_node_info *monitored_node = NULL; - RecordStatus record_status; + NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; + PGconn *monitoring_conn = NULL; + t_node_info *monitored_node = NULL; + t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER; + RecordStatus record_status; bool failover_done = false; /* sanity check local database */ @@ -72,6 +73,16 @@ monitor_bdr(void) exit(ERR_BAD_CONFIG); } + record_status = get_bdr_node_record_by_name(local_conn, local_node_info.node_name, &bdr_node_info); + + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve BDR record for node %s, terminating"), + local_node_info.node_name); + PQfinish(local_conn); + exit(ERR_BAD_CONFIG); + } + /* Retrieve record for this node from the local database */ record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info); @@ -88,6 +99,7 @@ monitor_bdr(void) exit(ERR_BAD_CONFIG); } + if (local_node_info.active == false) { log_error(_("local node (ID: %i) is marked as inactive in repmgr"), @@ -97,6 +109,14 @@ monitor_bdr(void) exit(ERR_BAD_CONFIG); } + if (is_active_bdr_node(local_conn, local_node_info.node_name)) + { + log_error(_("BDR node %s is not active, terminating"), + local_node_info.node_name); + PQfinish(local_conn); + exit(ERR_BAD_CONFIG); + } + /* Log startup event */ create_event_record(local_conn,