From 4d4ed3bcd60a207e6a8038ba5b518dae24f00e92 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 16 Jan 2020 09:51:11 +0900 Subject: [PATCH] Remove BDR 2.x support The BDR 2.x support was conceptual only and was never used in production. As BDR 2.x will be EOL'd shortly, there is no risk it will be needed. --- HISTORY | 1 + Makefile.in | 4 +- configfile.c | 37 +- configfile.h | 6 - dbutils.c | 829 ------------------- dbutils.h | 74 +- doc/bdr-failover.md | 8 - doc/configuration-file-optional-settings.xml | 11 +- doc/event-notifications.xml | 29 - doc/filelist.xml | 1 - doc/repmgr-cluster-show.xml | 2 +- doc/repmgr.xml | 1 - doc/repmgrd-bdr.xml | 429 ---------- doc/repmgrd-configuration.xml | 12 - expected/repmgr_extension.out | 18 - repmgr--5.0--5.1.sql | 2 + repmgr--5.0.sql | 10 - repmgr--5.1.sql | 10 - repmgr-action-bdr.c | 557 ------------- repmgr-action-bdr.h | 28 - repmgr-action-node.c | 29 - repmgr-action-witness.c | 25 - repmgr-client.c | 65 +- repmgr-client.h | 32 +- repmgr.c | 66 -- repmgr.conf.sample | 13 +- repmgr.h | 5 - repmgrd-bdr.c | 678 --------------- repmgrd-bdr.h | 26 - repmgrd.c | 20 +- scripts/bdr-pgbouncer.sh | 101 --- sql/repmgr_extension.sql | 3 - 32 files changed, 39 insertions(+), 3093 deletions(-) delete mode 100644 doc/bdr-failover.md delete mode 100644 doc/repmgrd-bdr.xml delete mode 100644 repmgr-action-bdr.c delete mode 100644 repmgr-action-bdr.h delete mode 100644 repmgrd-bdr.c delete mode 100644 repmgrd-bdr.h delete mode 100644 scripts/bdr-pgbouncer.sh diff --git a/HISTORY b/HISTORY index 77ace6b0..fc4299e9 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,5 @@ 5.1 2019-??-?? + repmgr: remove BDR 2.x support repmgr: don't query upstream's data directory (Ian) repmgr: rename --recovery-conf-only to --replication-conf-only (Ian) repmgr: ensure postgresql.auto.conf is created with corretc permissions (Ian) diff --git a/Makefile.in b/Makefile.in index 3dbd068d..625f1a53 100644 --- a/Makefile.in +++ b/Makefile.in @@ -55,9 +55,9 @@ $(info Building against PostgreSQL $(MAJORVERSION)) REPMGR_CLIENT_OBJS = repmgr-client.o \ repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \ - repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o \ + repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o \ configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o -REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o +REPMGRD_OBJS = repmgrd.o repmgrd-physical.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o DATE=$(shell date "+%Y-%m-%d") repmgr_version.h: repmgr_version.h.in diff --git a/configfile.c b/configfile.c index 6eed0cfe..a3d23b24 100644 --- a/configfile.c +++ b/configfile.c @@ -378,13 +378,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * */ options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL; - /*------------- - * BDR settings - *------------- - */ - options->bdr_local_monitoring_only = false; - options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT; - /*------------------------- * service command settings *------------------------- @@ -616,10 +609,8 @@ parse_configuration_item(t_configuration_options *options, ItemList *error_list, { if (strcmp(value, "physical") == 0) options->replication_type = REPLICATION_TYPE_PHYSICAL; - else if (strcmp(value, "bdr") == 0) - options->replication_type = REPLICATION_TYPE_BDR; else - item_list_append(error_list, _("value for \"replication_type\" must be \"physical\" or \"bdr\"")); + item_list_append(error_list, _("value for \"replication_type\" must be \"physical\"")); } /* log settings */ @@ -778,12 +769,6 @@ parse_configuration_item(t_configuration_options *options, ItemList *error_list, else if (strcmp(name, "witness_sync_interval") == 0) options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1); - /* BDR settings */ - else if (strcmp(name, "bdr_local_monitoring_only") == 0) - options->bdr_local_monitoring_only = parse_bool(value, name, error_list); - else if (strcmp(name, "bdr_recovery_timeout") == 0) - options->bdr_recovery_timeout = repmgr_atoi(value, name, error_list, 0); - /* service settings */ else if (strcmp(name, "pg_ctl_options") == 0) strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options)); @@ -1112,8 +1097,6 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL * with these): * * - async_query_timeout - * - bdr_local_monitoring_only - * - bdr_recovery_timeout * - child_nodes_check_interval * - child_nodes_connected_min_count * - child_nodes_connected_include_witness @@ -1250,24 +1233,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type) config_changed = true; } - /* bdr_local_monitoring_only */ - if (orig_options->bdr_local_monitoring_only != new_options.bdr_local_monitoring_only) - { - orig_options->bdr_local_monitoring_only = new_options.bdr_local_monitoring_only; - log_info(_("\"bdr_local_monitoring_only\" is now \"%s\""), new_options.bdr_local_monitoring_only == true ? "TRUE" : "FALSE"); - - config_changed = true; - } - - /* bdr_recovery_timeout */ - if (orig_options->bdr_recovery_timeout != new_options.bdr_recovery_timeout) - { - orig_options->bdr_recovery_timeout = new_options.bdr_recovery_timeout; - log_info(_("\"bdr_recovery_timeout\" is now \"%i\""), new_options.bdr_recovery_timeout); - - config_changed = true; - } - /* child_nodes_check_interval */ if (orig_options->child_nodes_check_interval != new_options.child_nodes_check_interval) { diff --git a/configfile.h b/configfile.h index 66eb7366..647209cd 100644 --- a/configfile.h +++ b/configfile.h @@ -161,10 +161,6 @@ typedef struct int child_nodes_disconnect_timeout; char child_nodes_disconnect_command[MAXPGPATH]; - /* BDR settings */ - bool bdr_local_monitoring_only; - bool bdr_recovery_timeout; - /* service settings */ char pg_ctl_options[MAXLEN]; char service_start_command[MAXPGPATH]; @@ -238,8 +234,6 @@ typedef struct DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT, \ DEFAULT_CHILD_NODES_CONNECTED_INCLUDE_WITNESS, \ DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT, "", \ - /* BDR settings */ \ - false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ "", "", "", "", "", "", \ /* repmgrd service settings */ \ diff --git a/dbutils.c b/dbutils.c index 7ac6d6f9..e6614551 100644 --- a/dbutils.c +++ b/dbutils.c @@ -33,13 +33,6 @@ #define NODE_RECORD_PARAM_COUNT 11 -/* - * This is set by is_bdr_db(), which is called by every BDR-related - * action anyway; this is required to be able to generate appropriate - * queries for versions 2 and 3. - */ -int bdr_version_num = UNKNOWN_BDR_VERSION_NUM; - static void log_db_error(PGconn *conn, const char *query_text, const char *fmt,...) __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); @@ -66,10 +59,6 @@ static ReplSlotStatus _verify_replication_slot(PGconn *conn, char *slot_name, PQ static bool _create_event(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details, t_event_info *event_info, bool send_notification); -static bool _is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet); -static void _populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row); -static void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list); - /* * This provides a standardized way of logging database errors. Note * that the provided PGconn can be a normal or a replication connection; @@ -119,36 +108,6 @@ parse_lsn(const char *str) } -/* - * Wrap query with appropriate DDL function, if required. - */ -void -wrap_ddl_query(PQExpBufferData *query_buf, int replication_type, const char *fmt,...) -{ - va_list arglist; - char buf[MAXLEN]; - - if (replication_type == REPLICATION_TYPE_BDR) - { - if (bdr_version_num < 3) - appendPQExpBufferStr(query_buf, "SELECT bdr.bdr_replicate_ddl_command($repmgr$"); - else - appendPQExpBufferStr(query_buf, "SELECT bdr.replicate_ddl_command($repmgr$"); - } - - va_start(arglist, fmt); - vsnprintf(buf, MAXLEN, fmt, arglist); - va_end(arglist); - - appendPQExpBufferStr(query_buf, buf); - - if (replication_type == REPLICATION_TYPE_BDR) - { - appendPQExpBufferStr(query_buf, "$repmgr$)"); - } -} - - /* ==================== */ /* Connection functions */ /* ==================== */ @@ -2517,10 +2476,6 @@ parse_node_type(const char *type) { return WITNESS; } - else if (strcmp(type, "bdr") == 0) - { - return BDR; - } return UNKNOWN; } @@ -2537,8 +2492,6 @@ get_node_type_string(t_server_type type) return "standby"; case WITNESS: return "witness"; - case BDR: - return "bdr"; /* this should never happen */ case UNKNOWN: default: @@ -5777,788 +5730,6 @@ is_wal_replay_paused(PGconn *conn, bool check_pending_wal) } -/* ============= */ -/* BDR functions */ -/* ============= */ - - -static bool -_is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool is_bdr_db = false; - - initPQExpBuffer(&query); - - appendPQExpBufferStr(&query, - " SELECT (pg_catalog.regexp_matches(extversion, '^\\d+'))[1] AS major_version " - " FROM pg_catalog.pg_extension " - " WHERE extname = 'bdr' "); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - is_bdr_db = false; - bdr_version_num = UNKNOWN_BDR_VERSION_NUM; - } - else - { - is_bdr_db = true; - bdr_version_num = atoi(PQgetvalue(res, 0, 0)); - } - - PQclear(res); - - log_verbose(LOG_DEBUG, "BDR ext version number is %i", bdr_version_num); - - if (is_bdr_db == false) - { - const char *warning = _("BDR extension is not available for this database"); - - if (output != NULL) - appendPQExpBufferStr(output, warning); - else if (quiet == false) - log_warning("%s", warning); - - return is_bdr_db; - } - - if (bdr_version_num < 3) - { - initPQExpBuffer(&query); - - appendPQExpBufferStr(&query, - "SELECT bdr.bdr_is_active_in_db()"); - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - is_bdr_db = atobool(PQgetvalue(res, 0, 0)); - - if (is_bdr_db == false) - { - const char *warning = _("BDR extension available for this database, but the database is not configured for BDR"); - - if (output != NULL) - appendPQExpBufferStr(output, warning); - else if (quiet == false) - log_warning("%s", warning); - } - - PQclear(res); - } - - - return is_bdr_db; -} - - -bool -is_bdr_db(PGconn *conn, PQExpBufferData *output) -{ - return _is_bdr_db(conn, output, false); -} - - -bool -is_bdr_db_quiet(PGconn *conn) -{ - return _is_bdr_db(conn, NULL, true); -} - - -int -get_bdr_version_num(void) -{ - return bdr_version_num; -} - -bool -is_active_bdr_node(PGconn *conn, const char *node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool is_active_bdr_node = false; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBuffer(&query, - " SELECT COALESCE(s.active, TRUE) AS active" - " FROM bdr.bdr_nodes n " - " LEFT JOIN pg_catalog.pg_replication_slots s " - " ON s.slot_name=bdr.bdr_format_slot_name(n.node_sysid, n.node_timeline, n.node_dboid, (SELECT oid FROM pg_catalog.pg_database WHERE datname = pg_catalog.current_database())) " - " WHERE n.node_name='%s' ", - node_name); - } - else - { - appendPQExpBuffer(&query, - " SELECT COALESCE(s.active, FALSE) AS active" - " FROM bdr.node bn " - " INNER JOIN pglogical.node pn " - " ON (pn.node_id = bn.pglogical_node_id) " - " LEFT JOIN pg_catalog.pg_replication_slots s " - " ON s.slot_name=bn.local_slot_name " - " WHERE pn.node_name='%s' ", - node_name); - } - - log_verbose(LOG_DEBUG, "is_active_bdr_node():\n %s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - /* we don't care if the query fails */ - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - is_active_bdr_node = false; - } - else - { - is_active_bdr_node = atobool(PQgetvalue(res, 0, 0)); - } - - PQclear(res); - - return is_active_bdr_node; -} - - -bool -is_bdr_repmgr(PGconn *conn) -{ - PQExpBufferData query; - PGresult *res = NULL; - int non_bdr_nodes = 0; - - initPQExpBuffer(&query); - - appendPQExpBufferStr(&query, - "SELECT pg_catalog.count(*)" - " FROM repmgr.nodes n" - " WHERE n.type != 'bdr' "); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - PQclear(res); - return false; - } - - non_bdr_nodes = atoi(PQgetvalue(res, 0, 0)); - - PQclear(res); - - return (non_bdr_nodes == 0) ? true : false; -} - - - -/* - * Get name of default BDR replication set. - * - * Caller must free provided value. - */ -char * -get_default_bdr_replication_set(PGconn *conn) -{ - PQExpBufferData query; - PGresult *res = NULL; - char *default_replication_set = NULL; - int namelen; - - if (bdr_version_num < 3) - { - /* For BDR2, we use a custom replication set */ - namelen = strlen(BDR2_REPLICATION_SET_NAME); - default_replication_set = pg_malloc0(namelen + 1); - snprintf(default_replication_set, - namelen + 1, - "%s", BDR2_REPLICATION_SET_NAME); - - return default_replication_set; - } - - initPQExpBuffer(&query); - - appendPQExpBufferStr(&query, - " SELECT rs.set_name " - " FROM pglogical.replication_set rs " - " INNER JOIN bdr.node_group ng " - " ON ng.node_group_default_repset = rs.set_id "); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - log_warning(_("unable to retrieve default BDR replication set name")); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - log_detail("%s", PQerrorMessage(conn)); - - PQclear(res); - return NULL; - } - - namelen = strlen(PQgetvalue(res, 0, 0)); - default_replication_set = pg_malloc0(namelen + 1); - - snprintf(default_replication_set, - namelen, - "%s", PQgetvalue(res, 0, 0)); - - PQclear(res); - - return default_replication_set; -} - - -bool -is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool in_replication_set = false; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBuffer(&query, - "SELECT pg_catalog.count(*) " - " FROM pg_catalog.unnest(bdr.table_get_replication_sets('repmgr.%s')) AS repset " - " WHERE repset='%s' ", - tablename, - set); - } - else - { - appendPQExpBuffer(&query, - " SELECT pg_catalog.count(*) " - " FROM pglogical.replication_set s " - " INNER JOIN pglogical.replication_set_table st " - " ON s.set_id = st.set_id " - " WHERE s.set_name = '%s' " - " AND st.set_reloid = 'repmgr.%s'::REGCLASS ", - set, - tablename); - - } - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - in_replication_set = false; - } - else - { - in_replication_set = atoi(PQgetvalue(res, 0, 0)) == 1 ? true : false; - } - - PQclear(res); - - return in_replication_set; -} - - - -bool -add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool success = true; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBuffer(&query, - "SELECT bdr.table_set_replication_sets('repmgr.%s', '{%s}')", - tablename, - set); - } - else - { - appendPQExpBuffer(&query, - " SELECT bdr.replication_set_add_table( " - " relation := 'repmgr.%s', " - " set_name := '%s' " - " ) ", - tablename, - set); - } - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - log_error(_("unable to add table \"repmgr.%s\" to replication set \"%s\""), - tablename, - set); - log_detail("%s", PQerrorMessage(conn)); - - success = false; - } - - PQclear(res); - - return success; -} - - -bool -bdr_node_name_matches(PGconn *conn, const char *node_name, PQExpBufferData *bdr_local_node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool node_exists = false; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBufferStr(&query, - "SELECT bdr.bdr_get_local_node_name() AS node_name"); - } - else - { - appendPQExpBufferStr(&query, - "SELECT node_name FROM bdr.local_node_info()"); - } - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - node_exists = false; - } - else - { - node_exists = true; - appendPQExpBuffer(bdr_local_node_name, - "%s", PQgetvalue(res, 0, 0)); - } - - PQclear(res); - - return node_exists; -} - - -ReplSlotStatus -get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - ReplSlotStatus status = SLOT_UNKNOWN; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBuffer(&query, - " SELECT s.active " - " FROM pg_catalog.pg_replication_slots s " - " WHERE slot_name = " - " (SELECT bdr.bdr_format_slot_name(node_sysid, node_timeline, node_dboid, datoid) " - " FROM bdr.bdr_nodes " - " WHERE node_name = '%s') ", - node_name); - } - else - { - appendPQExpBuffer(&query, - " SELECT COALESCE(s.active, FALSE) AS active" - " FROM bdr.node bn " - " INNER JOIN pglogical.node pn " - " ON (pn.node_id = bn.pglogical_node_id) " - " INNER JOIN pg_catalog.pg_replication_slots s " - " ON s.slot_name=bn.local_slot_name " - " WHERE pn.node_name='%s' ", - node_name); - } - - log_verbose(LOG_DEBUG, "get_bdr_node_replication_slot_status():\n %s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - status = SLOT_UNKNOWN; - } - else - { - status = (atobool(PQgetvalue(res, 0, 0)) == true) - ? SLOT_ACTIVE - : SLOT_INACTIVE; - } - - PQclear(res); - - return status; -} - - -void -get_bdr_other_node_name(PGconn *conn, int node_id, char *node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - - initPQExpBuffer(&query); - - appendPQExpBuffer(&query, - " SELECT n.node_name " - " FROM repmgr.nodes n " - " WHERE n.node_id != %i", - node_id); - - log_verbose(LOG_DEBUG, "get_bdr_other_node_name():\n %s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) == PGRES_TUPLES_OK) - { - snprintf(node_name, - NAMEDATALEN, - "%s", PQgetvalue(res, 0, 0)); - } - else - { - log_warning(_("get_bdr_other_node_name(): unable to execute query\n %s"), - PQerrorMessage(conn)); - } - PQclear(res); - - return; -} - - -/* - * For BDR 2.x only - */ -void -add_extension_tables_to_bdr_replication_set(PGconn *conn) -{ - PQExpBufferData query; - PGresult *res = NULL; - - initPQExpBuffer(&query); - - appendPQExpBufferStr(&query, - " SELECT c.relname " - " FROM pg_class c " - "INNER JOIN pg_namespace n " - " ON c.relnamespace = n.oid " - " WHERE n.nspname = 'repmgr' " - " AND c.relkind = 'r' "); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - /* XXX log error */ - } - else - { - int i; - - for (i = 0; i < PQntuples(res); i++) - { - add_table_to_bdr_replication_set(conn, - PQgetvalue(res, i, 0), - "repmgr"); - } - } - - PQclear(res); - - return; -} - -void -get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list) -{ - PQExpBufferData query; - PGresult *res = NULL; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBufferStr(&query, - " SELECT " BDR2_NODES_COLUMNS - " FROM bdr.bdr_nodes " - "ORDER BY node_seq_id "); - } - else - { - appendPQExpBufferStr(&query, - " SELECT " BDR3_NODES_COLUMNS - " FROM bdr.node_summary ns " - " ORDER BY node_name"); - } - - log_verbose(LOG_DEBUG, "get_all_bdr_node_records():\n%s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - _populate_bdr_node_records(res, node_list); - - PQclear(res); - return; -} - -RecordStatus -get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info) -{ - PQExpBufferData query; - PGresult *res = NULL; - - initPQExpBuffer(&query); - - if (bdr_version_num < 3) - { - appendPQExpBuffer(&query, - " SELECT " BDR2_NODES_COLUMNS - " FROM bdr.bdr_nodes " - " WHERE node_name = '%s'", - node_name); - } - else - { - appendPQExpBuffer(&query, - " SELECT " BDR3_NODES_COLUMNS - " FROM bdr.node_summary ns " - " WHERE ns.node_name = '%s'", - node_name); - } - - log_verbose(LOG_DEBUG, "get_bdr_node_record_by_name():\n%s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - log_error(_("unable to retrieve BDR node record for \"%s\":\n %s"), - node_name, - PQerrorMessage(conn)); - - PQclear(res); - return RECORD_ERROR; - } - - if (PQntuples(res) == 0) - { - PQclear(res); - return RECORD_NOT_FOUND; - } - - _populate_bdr_node_record(res, node_info, 0); - - PQclear(res); - - return RECORD_FOUND; -} - - -static -void -_populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list) -{ - int i; - - clear_node_info_list((NodeInfoList *) node_list); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - return; - } - - for (i = 0; i < PQntuples(res); i++) - { - BdrNodeInfoListCell *cell; - - cell = (BdrNodeInfoListCell *) pg_malloc0(sizeof(BdrNodeInfoListCell)); - - cell->node_info = pg_malloc0(sizeof(t_bdr_node_info)); - - _populate_bdr_node_record(res, cell->node_info, i); - - if (node_list->tail) - node_list->tail->next = cell; - else - node_list->head = cell; - - node_list->tail = cell; - node_list->node_count++; - } - - return; -} - - -static void -_populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row) -{ - snprintf(node_info->node_sysid, sizeof(node_info->node_sysid), "%s", PQgetvalue(res, row, 0)); - node_info->node_timeline = atoi(PQgetvalue(res, row, 1)); - node_info->node_dboid = atoi(PQgetvalue(res, row, 2)); - snprintf(node_info->node_name, sizeof(node_info->node_name), "%s", PQgetvalue(res, row, 3)); - snprintf(node_info->node_local_dsn, sizeof(node_info->node_local_dsn), "%s", PQgetvalue(res, row, 4)); - snprintf(node_info->peer_state_name, sizeof(node_info->peer_state_name), "%s", PQgetvalue(res, row, 5)); -} - - -bool -am_bdr_failover_handler(PGconn *conn, int node_id) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool am_handler = false; - - initPQExpBuffer(&query); - - appendPQExpBuffer(&query, - "SELECT repmgr.am_bdr_failover_handler(%i)", - node_id); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - log_error(_("unable to execute function repmgr.am_bdr_failover_handler():\n %s"), - PQerrorMessage(conn)); - PQclear(res); - return false; - } - - - am_handler = atobool(PQgetvalue(res, 0, 0)); - - PQclear(res); - - return am_handler; -} - -void -unset_bdr_failover_handler(PGconn *conn) -{ - PGresult *res = NULL; - - res = PQexec(conn, "SELECT repmgr.unset_bdr_failover_handler()"); - - PQclear(res); - return; -} - - -bool -bdr_node_has_repmgr_set(PGconn *conn, const char *node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool has_repmgr_set = false; - - if (bdr_version_num >= 3) - return true; - - initPQExpBuffer(&query); - - appendPQExpBuffer(&query, - " SELECT pg_catalog.count(*) " - " FROM pg_catalog.unnest(bdr.connection_get_replication_sets('%s') AS repset " - " WHERE repset = '%s'", - node_name, - BDR2_REPLICATION_SET_NAME); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - if (PQresultStatus(res) != PGRES_TUPLES_OK || PQntuples(res) == 0) - { - has_repmgr_set = false; - } - else - { - has_repmgr_set = atoi(PQgetvalue(res, 0, 0)) == 1 ? true : false; - } - - PQclear(res); - - return has_repmgr_set; -} - - -bool -bdr_node_set_repmgr_set(PGconn *conn, const char *node_name) -{ - PQExpBufferData query; - PGresult *res = NULL; - bool success = true; - - if (bdr_version_num >= 3) - return true; - - initPQExpBuffer(&query); - - /* - * Here we extract a list of existing replication sets, add 'repmgr', and - * set the replication sets to the new list. - */ - appendPQExpBuffer(&query, - " SELECT bdr.connection_set_replication_sets( " - " ARRAY( " - " SELECT repset::TEXT " - " FROM pg_catalog.unnest(bdr.connection_get_replication_sets('%s')) AS repset " - " UNION " - " SELECT '%s'::TEXT " - " ), " - " '%s' " - " ) ", - node_name, - BDR2_REPLICATION_SET_NAME, - node_name); - - log_debug("bdr_node_set_repmgr_set():\n%s", query.data); - - res = PQexec(conn, query.data); - termPQExpBuffer(&query); - - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - log_debug("result status: %s", PQresStatus(PQresultStatus(res))); - log_error(_("unable to create replication set \"repmgr\"")); - log_detail("%s", PQerrorMessage(conn)); - success = false; - } - - PQclear(res); - - return success; -} - - - /* miscellaneous debugging functions */ const char * diff --git a/dbutils.h b/dbutils.h index e78df15b..06c92fd8 100644 --- a/dbutils.h +++ b/dbutils.h @@ -60,11 +60,6 @@ "NULL AS attached " - -#define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''" -#define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name" - - #define ERRBUFF_SIZE 512 typedef enum @@ -72,8 +67,7 @@ typedef enum UNKNOWN = 0, PRIMARY, STANDBY, - WITNESS, - BDR + WITNESS } t_server_type; typedef enum @@ -326,45 +320,6 @@ typedef struct s_connection_user #define T_CONNECTION_USER_INITIALIZER { "", false } -/* represents an entry in bdr.bdr_nodes */ -typedef struct s_bdr_node_info -{ - char node_sysid[MAXLEN]; - uint32 node_timeline; - uint32 node_dboid; - char node_name[MAXLEN]; - char node_local_dsn[MAXLEN]; - char peer_state_name[MAXLEN]; -} t_bdr_node_info; - -#define T_BDR_NODE_INFO_INITIALIZER { \ - "", InvalidOid, InvalidOid, \ - "", "", "" \ -} - - -/* structs to store a list of BDR node records */ -typedef struct BdrNodeInfoListCell -{ - struct BdrNodeInfoListCell *next; - t_bdr_node_info *node_info; -} BdrNodeInfoListCell; - -typedef struct BdrNodeInfoList -{ - BdrNodeInfoListCell *head; - BdrNodeInfoListCell *tail; - int node_count; -} BdrNodeInfoList; - -#define T_BDR_NODE_INFO_LIST_INITIALIZER { \ - NULL, \ - NULL, \ - 0 \ -} - - - typedef struct { char filepath[MAXPGPATH]; @@ -374,6 +329,7 @@ typedef struct #define T_CONFIGFILE_INFO_INITIALIZER { "", "", false } + typedef struct { int size; @@ -383,6 +339,7 @@ typedef struct #define T_CONFIGFILE_LIST_INITIALIZER { 0, 0, NULL } + typedef struct { uint64 system_identifier; @@ -422,10 +379,6 @@ typedef struct RepmgrdInfo { /* utility functions */ XLogRecPtr parse_lsn(const char *str); - -extern void -wrap_ddl_query(PQExpBufferData *query_buf, int replication_type, const char *fmt,...) -__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); bool atobool(const char *value); /* connection functions */ @@ -630,27 +583,6 @@ int get_upstream_last_seen(PGconn *conn, t_server_type node_type); bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal); -/* BDR functions */ -int get_bdr_version_num(void); -void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list); -RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info); -bool is_bdr_db(PGconn *conn, PQExpBufferData *output); -bool is_bdr_db_quiet(PGconn *conn); -bool is_active_bdr_node(PGconn *conn, const char *node_name); -bool is_bdr_repmgr(PGconn *conn); -char *get_default_bdr_replication_set(PGconn *conn); -bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); -bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); -void add_extension_tables_to_bdr_replication_set(PGconn *conn); -bool bdr_node_name_matches(PGconn *conn, const char *node_name, PQExpBufferData *bdr_local_node_name); -ReplSlotStatus get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name); -void get_bdr_other_node_name(PGconn *conn, int node_id, char *name_buf); - -bool am_bdr_failover_handler(PGconn *conn, int node_id); -void unset_bdr_failover_handler(PGconn *conn); -bool bdr_node_has_repmgr_set(PGconn *conn, const char *node_name); -bool bdr_node_set_repmgr_set(PGconn *conn, const char *node_name); - /* miscellaneous debugging functions */ const char *print_node_status(NodeStatus node_status); const char *print_pqping_status(PGPing ping_status); diff --git a/doc/bdr-failover.md b/doc/bdr-failover.md deleted file mode 100644 index e1063d48..00000000 --- a/doc/bdr-failover.md +++ /dev/null @@ -1,8 +0,0 @@ -BDR failover with repmgrd -========================= - -This document has been integrated into the main `repmgr` documentation -and is now located here: - -> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html) - diff --git a/doc/configuration-file-optional-settings.xml b/doc/configuration-file-optional-settings.xml index 6c915f79..0a2d5689 100644 --- a/doc/configuration-file-optional-settings.xml +++ b/doc/configuration-file-optional-settings.xml @@ -66,17 +66,8 @@ - Must be one of physical (for standard streaming replication) - or bdr. + Must be physical (the default). - - - Replication type bdr can only be used with BDR 2.x - - - BDR 3.x users should use physical. - - diff --git a/doc/event-notifications.xml b/doc/event-notifications.xml index ad39fb99..ce4a06ed 100644 --- a/doc/event-notifications.xml +++ b/doc/event-notifications.xml @@ -117,10 +117,6 @@ conninfo string of the primary node ( and ) - - conninfo string of the next available node - (bdr_failover and bdr_recovery) - @@ -130,9 +126,6 @@ name of the current primary node ( and ) - - name of the next available node (bdr_failover and bdr_recovery) - @@ -273,28 +266,6 @@ - - Events generated by &repmgrd; (BDR mode): - - - bdr_failover - - - bdr_reconnect - - - bdr_recovery - - - bdr_register - - - bdr_unregister - - - - - Note that under some circumstances (e.g. when no replication cluster primary could be located), it will not be possible to write an entry into the diff --git a/doc/filelist.xml b/doc/filelist.xml index b2bb1578..1226344b 100644 --- a/doc/filelist.xml +++ b/doc/filelist.xml @@ -33,7 +33,6 @@ - diff --git a/doc/repmgr-cluster-show.xml b/doc/repmgr-cluster-show.xml index 91bc1374..efb07501 100644 --- a/doc/repmgr-cluster-show.xml +++ b/doc/repmgr-cluster-show.xml @@ -18,7 +18,7 @@ Displays information about each registered node in the replication cluster. This command polls each registered server and shows its role (primary / - standby / bdr) and status. It polls each server + standby) and status. It polls each server directly and can be run on any node in the cluster; this is also useful when analyzing connectivity from a particular node. diff --git a/doc/repmgr.xml b/doc/repmgr.xml index 524f8223..3cc931e5 100644 --- a/doc/repmgr.xml +++ b/doc/repmgr.xml @@ -91,7 +91,6 @@ &repmgrd-automatic-failover; &repmgrd-configuration; &repmgrd-operation; - &repmgrd-bdr; diff --git a/doc/repmgrd-bdr.xml b/doc/repmgrd-bdr.xml deleted file mode 100644 index 90b813fd..00000000 --- a/doc/repmgrd-bdr.xml +++ /dev/null @@ -1,429 +0,0 @@ - - BDR failover with repmgrd - - - repmgrd - BDR - - - - BDR - - - - &repmgr; 4.x provides support for monitoring a pair of BDR 2.x nodes and taking action in - case one of the nodes fails. - - - - Due to the nature of BDR 1.x/2.x, it's only safe to use this solution for - a two-node scenario. Introducing additional nodes will create an inherent - risk of node desynchronisation if a node goes down without being cleanly - removed from the cluster. - - - - In contrast to streaming replication, there's no concept of "promoting" a new - primary node with BDR. Instead, "failover" involves monitoring both nodes - with &repmgrd; and redirecting queries from the failed node to the remaining - active node. This can be done by using an - event notification script - which is called by &repmgrd; to dynamically - reconfigure a proxy server/connection pooler such as PgBouncer. - - - - - This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6. - It is not required for later BDR versions. - - - - - Prerequisites - - - This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6. - It is not required for later BDR versions. - - - - &repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension - enabled and configured for a two-node BDR network. &repmgr; 4 packages - must be installed on each node before attempting to configure - repmgr. - - - - &repmgr; 4 will refuse to install if it detects more than two BDR nodes. - - - - Application database connections *must* be passed through a proxy server/ - connection pooler such as PgBouncer, and it must be possible to dynamically - reconfigure that from &repmgrd;. The example demonstrated in this document - will use PgBouncer - - - The proxy server / connection poolers must not - be installed on the database servers. - - - For this example, it's assumed password-less SSH connections are available - from the PostgreSQL servers to the servers where PgBouncer - runs, and that the user on those servers has permission to alter the - PgBouncer configuration files. - - - PostgreSQL connections must be possible between each node, and each node - must be able to connect to each PgBouncer instance. - - - - - Configuration - - A sample configuration for repmgr.conf on each - BDR node would look like this: - - # Node information - node_id=1 - node_name='node1' - conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2' - data_directory='/var/lib/postgresql/data' - replication_type='bdr' - - # Event notification configuration - event_notifications='bdr_failover' - event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1' - - # repmgrd options - monitor_interval_secs=5 - reconnect_attempts=6 - reconnect_interval=5 - - - Adjust settings as appropriate; copy and adjust for the second node (particularly - the values node_id, node_name - and conninfo). - - - Note that the values provided for the conninfo string - must be valid for connections from both nodes in the - replication cluster. The database must be the BDR-enabled database. - - - If defined, the event_notifications parameter will restrict - execution of the script defined in event_notification_command - to the specified event(s). - - - - event_notification_command is the script which does the actual "heavy lifting" - of reconfiguring the proxy server/ connection pooler. It is fully - user-definable; see section for a reference - implementation. - - - - - - - repmgr setup - - Register both nodes; example on node1: - - $ repmgr -f /etc/repmgr.conf bdr register - NOTICE: attempting to install extension "repmgr" - NOTICE: "repmgr" extension successfully installed - NOTICE: node record created for node 'node1' (ID: 1) - NOTICE: BDR node 1 registered (conninfo: host=node1 dbname=bdrtest user=repmgr) - - - and on node1: - - $ repmgr -f /etc/repmgr.conf bdr register - NOTICE: node record created for node 'node2' (ID: 2) - NOTICE: BDR node 2 registered (conninfo: host=node2 dbname=bdrtest user=repmgr) - - - The repmgr extension will be automatically created - when the first node is registered, and will be propagated to the second - node. - - - - Ensure the &repmgr; package is available on both nodes before - attempting to register the first node. - - - - At this point the meta data for both nodes has been created; executing - (on either node) should produce output like this: - - $ repmgr -f /etc/repmgr.conf cluster show - ID | Name | Role | Status | Upstream | Location | Connection string - ----+-------+------+-----------+----------+-------------------------------------------------------- - 1 | node1 | bdr | * running | | default | host=node1 dbname=bdrtest user=repmgr connect_timeout=2 - 2 | node2 | bdr | * running | | default | host=node2 dbname=bdrtest user=repmgr connect_timeout=2 - - - Additionally it's possible to display log of significant events; executing - (on either node) should produce output like this: - - $ repmgr -f /etc/repmgr.conf cluster event - Node ID | Event | OK | Timestamp | Details - ---------+--------------+----+---------------------+---------------------------------------------- - 2 | bdr_register | t | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2) - 1 | bdr_register | t | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1) - - - - At this point there will only be records for the two node registrations (displayed here - in reverse chronological order). - - - - - Defining the BDR failover "event_notification_command" - - Key to "failover" execution is the event_notification_command, - which is a user-definable script specified in repmpgr.conf - and which can use a &repmgr; event notification - to reconfigure the proxy server / connection pooler so it points to the other, still-active node. - Details of the event will be passed as parameters to the script. - - - Following parameter placeholders are available for the script definition in repmpgr.conf; - these will be replaced with the appropriate value when the script is executed: - - - - - - - - node ID - - - - - - - - - event type - - - - - - - - - success (1 or 0) - - - - - - - - timestamp - - - - - - - - - details - - - - - - - - conninfo string of the next available node (bdr_failover and bdr_recovery) - - - - - - - - name of the next available node (bdr_failover and bdr_recovery) - - - - - - - Note that %c and %a are only provided with - particular failover events, in this case bdr_failover. - - - The provided sample script - (scripts/bdr-pgbouncer.sh) - is configured as follows: - - event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"' - - - and parses the placeholder parameters like this: - - NODE_ID=$1 - EVENT_TYPE=$2 - SUCCESS=$3 - NEXT_CONNINFO=$4 - NEXT_NODE_NAME=$5 - - - - The sample script also contains some hard-coded values for the PgBouncer - configuration for both nodes; these will need to be adjusted for your local environment - (ideally the scripts would be maintained as templates and generated by some - kind of provisioning system). - - - - - The script performs following steps: - - - pauses PgBouncer on all nodes - - - recreates the PgBouncer configuration file on each - node using the information provided by &repmgrd; - (primarily the conninfo string) to configure - PgBouncer - - - reloads the PgBouncer configuration - - - executes the RESUME command (in PgBouncer) - - - - - Following successful script execution, any connections to PgBouncer on the failed BDR node - will be redirected to the active node. - - - - - Node monitoring and failover - - At the intervals specified by monitor_interval_secs - in repmgr.conf, &repmgrd; - will ping each node to check if it's available. If a node isn't available, - &repmgrd; will enter failover mode and check reconnect_attempts - times at intervals of reconnect_interval to confirm the node is definitely unreachable. - This buffer period is necessary to avoid false positives caused by transient - network outages. - - - If the node is still unavailable, &repmgrd; will enter failover mode and execute - the script defined in event_notification_command; an entry will be logged - in the repmgr.events table and &repmgrd; will - (unless otherwise configured) resume monitoring of the node in "degraded" mode until it reappears. - - - &repmgrd; logfile output during a failover event will look something like this - on one node (usually the node which has failed, here node2): - - ... - [2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring - [2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2) - [2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2) - [2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2) - [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2) - [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts - [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts - [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts - [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts - [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts - [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts - [2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive - [2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover" - [2017-07-27 21:09:28] [DETAIL] command is: - /path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1" - [2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1) - [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2) - [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode - ... - - - Output on the other node (node1) during the same event will look like this: - - ... - [2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring - [2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1) - [2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1) - [2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1) - [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2) - [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts - [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts - [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts - [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts - [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts - [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt - [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts - [2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover - [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1) - [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode - ... - - - This assumes only the PostgreSQL instance on node2 has failed. In this case the - &repmgrd; instance running on node2 has performed the failover. However if - the entire server becomes unavailable, &repmgrd; on node1 will perform - the failover. - - - - Node recovery - - Following failure of a BDR node, if the node subsequently becomes available again, - a bdr_recovery event will be generated. This could potentially be used to - reconfigure PgBouncer automatically to bring the node back into the available pool, - however it would be prudent to manually verify the node's status before - exposing it to the application. - - - If the failed node comes back up and connects correctly, output similar to this - will be visible in the &repmgrd; log: - - [2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode - [2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2) - [2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode - [2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds - [2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds - - - - - Shutdown of both nodes - - If both PostgreSQL instances are shut down, &repmgrd; will try and handle the - situation as gracefully as possible, though with no failover candidates available - there's not much it can do. Should this case ever occur, we recommend shutting - down &repmgrd; on both nodes and restarting it once the PostgreSQL instances - are running properly. - - - - diff --git a/doc/repmgrd-configuration.xml b/doc/repmgrd-configuration.xml index 03193759..cf45f494 100644 --- a/doc/repmgrd-configuration.xml +++ b/doc/repmgrd-configuration.xml @@ -619,18 +619,6 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop' - - - bdr_local_monitoring_only - - - - - - bdr_recovery_timeout - - - child_nodes_check_interval diff --git a/expected/repmgr_extension.out b/expected/repmgr_extension.out index e0dfdcf1..9013b848 100644 --- a/expected/repmgr_extension.out +++ b/expected/repmgr_extension.out @@ -32,18 +32,6 @@ SELECT * FROM repmgr.show_nodes; (0 rows) -- functions -SELECT repmgr.am_bdr_failover_handler(-1); - am_bdr_failover_handler -------------------------- - -(1 row) - -SELECT repmgr.am_bdr_failover_handler(NULL); - am_bdr_failover_handler -------------------------- - -(1 row) - SELECT repmgr.get_new_primary(); get_new_primary ----------------- @@ -92,9 +80,3 @@ SELECT repmgr.standby_set_last_updated(); (1 row) -SELECT repmgr.unset_bdr_failover_handler(); - unset_bdr_failover_handler ----------------------------- - -(1 row) - diff --git a/repmgr--5.0--5.1.sql b/repmgr--5.0--5.1.sql index 1fea6b16..29d09bd2 100644 --- a/repmgr--5.0--5.1.sql +++ b/repmgr--5.0--5.1.sql @@ -1,3 +1,5 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION repmgr" to load this file. \quit +DROP FUNCTION am_bdr_failover_handler(INT); +DROP FUNCTION unset_bdr_failover_handler(); diff --git a/repmgr--5.0.sql b/repmgr--5.0.sql index ba76d7b3..8f04a821 100644 --- a/repmgr--5.0.sql +++ b/repmgr--5.0.sql @@ -153,16 +153,6 @@ CREATE FUNCTION reset_voting_status() AS 'MODULE_PATHNAME', 'reset_voting_status' LANGUAGE C STRICT; -CREATE FUNCTION am_bdr_failover_handler(INT) - RETURNS BOOL - AS 'MODULE_PATHNAME', 'am_bdr_failover_handler' - LANGUAGE C STRICT; - -CREATE FUNCTION unset_bdr_failover_handler() - RETURNS VOID - AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler' - LANGUAGE C STRICT; - CREATE FUNCTION get_repmgrd_pid() RETURNS INT AS 'MODULE_PATHNAME', 'get_repmgrd_pid' diff --git a/repmgr--5.1.sql b/repmgr--5.1.sql index ba76d7b3..8f04a821 100644 --- a/repmgr--5.1.sql +++ b/repmgr--5.1.sql @@ -153,16 +153,6 @@ CREATE FUNCTION reset_voting_status() AS 'MODULE_PATHNAME', 'reset_voting_status' LANGUAGE C STRICT; -CREATE FUNCTION am_bdr_failover_handler(INT) - RETURNS BOOL - AS 'MODULE_PATHNAME', 'am_bdr_failover_handler' - LANGUAGE C STRICT; - -CREATE FUNCTION unset_bdr_failover_handler() - RETURNS VOID - AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler' - LANGUAGE C STRICT; - CREATE FUNCTION get_repmgrd_pid() RETURNS INT AS 'MODULE_PATHNAME', 'get_repmgrd_pid' diff --git a/repmgr-action-bdr.c b/repmgr-action-bdr.c deleted file mode 100644 index 1950791f..00000000 --- a/repmgr-action-bdr.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * repmgr-action-bdr.c - * - * Implements BDR-related actions for the repmgr command line utility - * - * Copyright (c) 2ndQuadrant, 2010-2020 - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "repmgr.h" - -#include "repmgr-client-global.h" -#include "repmgr-action-bdr.h" - - -/* - * do_bdr_register() - * - * As each BDR node is its own primary, registering a BDR node - * will create the repmgr metadata schema if necessary. - */ -void -do_bdr_register(void) -{ - PGconn *conn = NULL; - BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER; - ExtensionStatus extension_status = REPMGR_UNKNOWN; - t_node_info node_info = T_NODE_INFO_INITIALIZER; - RecordStatus record_status = RECORD_NOT_FOUND; - PQExpBufferData event_details; - bool success = true; - char *dbname = NULL; - - /* sanity-check configuration for BDR-compatability */ - if (config_file_options.replication_type != REPLICATION_TYPE_BDR) - { - log_error(_("cannot run BDR REGISTER on a non-BDR node")); - exit(ERR_BAD_CONFIG); - } - - dbname = pg_malloc0(MAXLEN); - - if (dbname == NULL) - { - log_error(_("unable to allocate memory; terminating.")); - exit(ERR_OUT_OF_MEMORY); - } - - /* store the database name for future reference */ - get_conninfo_value(config_file_options.conninfo, "dbname", dbname); - - conn = establish_db_connection(config_file_options.conninfo, true); - - if (!is_bdr_db(conn, NULL)) - { - log_error(_("database \"%s\" is not BDR-enabled"), dbname); - log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database")); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - /* Check that there are at most 2 BDR nodes */ - get_all_bdr_node_records(conn, &bdr_nodes); - - if (bdr_nodes.node_count == 0) - { - log_error(_("database \"%s\" is BDR-enabled but no BDR nodes were found"), dbname); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - /* BDR 2 implementation is for 2 nodes only */ - if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2) - { - log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes")); - log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - if (get_bdr_version_num() > 2) - { - log_error(_("\"repmgr bdr register\" is for BDR 2.x only")); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - - /* check for a matching BDR node */ - { - PQExpBufferData bdr_local_node_name; - bool node_match = false; - - initPQExpBuffer(&bdr_local_node_name); - node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name); - - if (node_match == false) - { - if (strlen(bdr_local_node_name.data)) - { - log_error(_("local node BDR node name is \"%s\", expected: \"%s\""), - bdr_local_node_name.data, - config_file_options.node_name); - log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes")); - } - else - { - log_error(_("local node does not report BDR node name")); - log_hint(_("ensure this is an active BDR node")); - } - - PQfinish(conn); - pfree(dbname); - termPQExpBuffer(&bdr_local_node_name); - exit(ERR_BAD_CONFIG); - } - - termPQExpBuffer(&bdr_local_node_name); - } - - /* check whether repmgr extension exists, and there are no non-BDR nodes registered */ - extension_status = get_repmgr_extension_status(conn, NULL); - - if (extension_status == REPMGR_UNKNOWN) - { - log_error(_("unable to determine status of \"repmgr\" extension in database \"%s\""), - dbname); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - if (extension_status == REPMGR_UNAVAILABLE) - { - log_error(_("\"repmgr\" extension is not available")); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - if (extension_status == REPMGR_INSTALLED) - { - if (!is_bdr_repmgr(conn)) - { - log_error(_("repmgr metadatabase contains records for non-BDR nodes")); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - } - else - { - log_debug("creating repmgr extension in database \"%s\"", dbname); - - begin_transaction(conn); - - if (!create_repmgr_extension(conn)) - { - log_error(_("unable to create repmgr extension - see preceding error message(s); aborting")); - rollback_transaction(conn); - pfree(dbname); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - commit_transaction(conn); - } - - pfree(dbname); - - if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false) - { - log_debug("bdr_node_has_repmgr_set() = false"); - bdr_node_set_repmgr_set(conn, config_file_options.node_name); - } - - /* - * before adding the extension tables to the replication set, if any other - * BDR nodes exist, populate repmgr.nodes with a copy of existing entries - * - * currently we won't copy the contents of any other tables - * - */ - { - NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER; - - (void) get_all_node_records(conn, &local_node_records); - - if (local_node_records.node_count == 0) - { - BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER; - BdrNodeInfoListCell *bdr_cell = NULL; - - get_all_bdr_node_records(conn, &bdr_nodes); - - if (bdr_nodes.node_count == 0) - { - log_error(_("unable to retrieve any BDR node records")); - log_detail("%s", PQerrorMessage(conn)); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - for (bdr_cell = bdr_nodes.head; bdr_cell; bdr_cell = bdr_cell->next) - { - PGconn *bdr_node_conn = NULL; - NodeInfoList existing_nodes = T_NODE_INFO_LIST_INITIALIZER; - NodeInfoListCell *cell = NULL; - ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN; - - /* skip the local node */ - if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0) - { - continue; - } - - log_debug("connecting to BDR node \"%s\" (conninfo: \"%s\")", - bdr_cell->node_info->node_name, - bdr_cell->node_info->node_local_dsn); - bdr_node_conn = establish_db_connection_quiet(bdr_cell->node_info->node_local_dsn); - - if (PQstatus(bdr_node_conn) != CONNECTION_OK) - { - continue; - } - - /* check repmgr schema exists, skip if not */ - other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL); - - if (other_node_extension_status != REPMGR_INSTALLED) - { - continue; - } - - (void) get_all_node_records(bdr_node_conn, &existing_nodes); - - for (cell = existing_nodes.head; cell; cell = cell->next) - { - log_debug("creating record for node \"%s\" (ID: %i)", - cell->node_info->node_name, cell->node_info->node_id); - create_node_record(conn, "bdr register", cell->node_info); - } - - PQfinish(bdr_node_conn); - break; - } - } - } - - /* Add the repmgr extension tables to a replication set */ - - if (get_bdr_version_num() < 3) - { - add_extension_tables_to_bdr_replication_set(conn); - } - else - { - /* this is the only table we need to replicate */ - char *replication_set = get_default_bdr_replication_set(conn); - - /* - * this probably won't happen, but we need to be sure we're using - * the replication set metadata correctly... - */ - if (conn == NULL) - { - log_error(_("unable to retrieve default BDR replication set")); - log_hint(_("see preceding messages")); - log_debug("check query in get_default_bdr_replication_set()"); - exit(ERR_BAD_CONFIG); - } - - if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false) - { - add_table_to_bdr_replication_set(conn, "nodes", replication_set); - } - - pfree(replication_set); - } - - initPQExpBuffer(&event_details); - - begin_transaction(conn); - - /* - * we'll check if a record exists (even if the schema was just created), - * as there's a faint chance of a race condition - */ - - record_status = get_node_record(conn, config_file_options.node_id, &node_info); - - /* Update internal node record */ - - node_info.type = BDR; - node_info.node_id = config_file_options.node_id; - node_info.upstream_node_id = NO_UPSTREAM_NODE; - node_info.active = true; - node_info.priority = config_file_options.priority; - - strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)); - strncpy(node_info.location, config_file_options.location, sizeof(node_info.location)); - strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo)); - - if (record_status == RECORD_FOUND) - { - bool node_updated = false; - - /* - * At this point we will have established there are no non-BDR - * records, so no need to verify the node type - */ - if (!runtime_options.force) - { - log_error(_("this node is already registered")); - log_hint(_("use -F/--force to overwrite the existing node record")); - rollback_transaction(conn); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - /* - * don't permit changing the node name - this must match the BDR node - * name set when the node was registered. - */ - - if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0) - { - log_error(_("a record for node %i is already registered with node_name \"%s\""), - config_file_options.node_id, node_info.node_name); - log_hint(_("node_name configured in repmgr.conf is \"%s\""), config_file_options.node_name); - - rollback_transaction(conn); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - node_updated = update_node_record(conn, "bdr register", &node_info); - - if (node_updated == true) - { - appendPQExpBuffer(&event_details, _("node record updated for node \"%s\" (%i)"), - config_file_options.node_name, config_file_options.node_id); - log_verbose(LOG_NOTICE, "%s", event_details.data); - } - else - { - success = false; - } - - } - else - { - /* create new node record */ - bool node_created = create_node_record(conn, "bdr register", &node_info); - - if (node_created == true) - { - appendPQExpBuffer(&event_details, - _("node record created for node \"%s\" (ID: %i)"), - config_file_options.node_name, config_file_options.node_id); - log_notice("%s", event_details.data); - } - else - { - success = false; - } - } - - if (success == false) - { - rollback_transaction(conn); - PQfinish(conn); - exit(ERR_DB_QUERY); - } - - commit_transaction(conn); - /* Log the event */ - create_event_notification( - conn, - &config_file_options, - config_file_options.node_id, - "bdr_register", - true, - event_details.data); - - termPQExpBuffer(&event_details); - - PQfinish(conn); - - log_notice(_("BDR node %i registered (conninfo: %s)"), - config_file_options.node_id, config_file_options.conninfo); - - return; -} - - -void -do_bdr_unregister(void) -{ - PGconn *conn = NULL; - ExtensionStatus extension_status = REPMGR_UNKNOWN; - int target_node_id = UNKNOWN_NODE_ID; - t_node_info node_info = T_NODE_INFO_INITIALIZER; - RecordStatus record_status = RECORD_NOT_FOUND; - bool node_record_deleted = false; - PQExpBufferData event_details; - char *dbname; - - /* sanity-check configuration for BDR-compatability */ - - if (config_file_options.replication_type != REPLICATION_TYPE_BDR) - { - log_error(_("cannot run BDR UNREGISTER on a non-BDR node")); - exit(ERR_BAD_CONFIG); - } - - dbname = pg_malloc0(MAXLEN); - - if (dbname == NULL) - { - log_error(_("unable to allocate memory; terminating.")); - exit(ERR_OUT_OF_MEMORY); - } - - /* store the database name for future reference */ - get_conninfo_value(config_file_options.conninfo, "dbname", dbname); - - conn = establish_db_connection(config_file_options.conninfo, true); - - if (!is_bdr_db(conn, NULL)) - { - log_error(_("database \"%s\" is not BDR-enabled"), dbname); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - extension_status = get_repmgr_extension_status(conn, NULL); - if (extension_status != REPMGR_INSTALLED) - { - log_error(_("repmgr is not installed on database \"%s\""), dbname); - PQfinish(conn); - pfree(dbname); - exit(ERR_BAD_CONFIG); - } - - pfree(dbname); - - if (!is_bdr_repmgr(conn)) - { - log_error(_("repmgr metadatabase contains records for non-BDR nodes")); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - initPQExpBuffer(&event_details); - if (runtime_options.node_id != UNKNOWN_NODE_ID) - target_node_id = runtime_options.node_id; - else - target_node_id = config_file_options.node_id; - - - /* Check node exists and is really a BDR node */ - record_status = get_node_record(conn, target_node_id, &node_info); - - if (record_status != RECORD_FOUND) - { - log_error(_("no record found for node %i"), target_node_id); - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - begin_transaction(conn); - - log_debug("unregistering node %i", target_node_id); - - node_record_deleted = delete_node_record(conn, target_node_id); - - if (node_record_deleted == false) - { - appendPQExpBuffer(&event_details, - "unable to delete node record for node \"%s\" (ID: %i)", - node_info.node_name, - target_node_id); - rollback_transaction(conn); - } - else - { - appendPQExpBuffer(&event_details, - "node record deleted for node \"%s\" (ID: %i)", - node_info.node_name, - target_node_id); - commit_transaction(conn); - } - - - /* Log the event */ - create_event_notification( - conn, - &config_file_options, - config_file_options.node_id, - "bdr_unregister", - true, - event_details.data); - - PQfinish(conn); - - log_notice(_("bdr node \"%s\" (ID: %i) successfully unregistered"), - node_info.node_name, target_node_id); - - termPQExpBuffer(&event_details); - - return; -} - - -void -do_bdr_help(void) -{ - print_help_header(); - - printf(_("Usage:\n")); - printf(_(" %s [OPTIONS] bdr register\n"), progname()); - printf(_(" %s [OPTIONS] bdr unregister\n"), progname()); - puts(""); - - printf(_("BDR REGISTER\n")); - puts(""); - printf(_(" \"bdr register\" initialises the repmgr cluster and registers the initial bdr node.\n")); - puts(""); - printf(_(" -F, --force overwrite an existing node record\n")); - puts(""); - - printf(_("BDR UNREGISTER\n")); - puts(""); - printf(_(" \"bdr unregister\" unregisters an inactive BDR node.\n")); - puts(""); - printf(_(" --node-id ID node to unregister (optional, used when the node to unregister\n" \ - " is offline)\n")); - puts(""); -} diff --git a/repmgr-action-bdr.h b/repmgr-action-bdr.h deleted file mode 100644 index e065ddc0..00000000 --- a/repmgr-action-bdr.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * repmgr-action-bdr.h - * Copyright (c) 2ndQuadrant, 2010-2020 - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _REPMGR_ACTION_BDR_H_ -#define _REPMGR_ACTION_BDR_H_ - -extern void do_bdr_register(void); -extern void do_bdr_unregister(void); - -extern void do_bdr_help(void); - - -#endif /* _REPMGR_ACTION_BDR_H_ */ diff --git a/repmgr-action-node.c b/repmgr-action-node.c index e36ac471..ad497d52 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -159,7 +159,6 @@ do_node_status(void) _("- node is registered as standby but running as primary")); } break; - case BDR: default: break; } @@ -1552,34 +1551,6 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS _("node is witness")); } break; - case BDR: - { - PQExpBufferData output; - - initPQExpBuffer(&output); - if (is_bdr_db(conn, &output) == false) - { - status = CHECK_STATUS_CRITICAL; - appendPQExpBufferStr(&details, - output.data); - } - termPQExpBuffer(&output); - - if (status == CHECK_STATUS_OK) - { - if (is_active_bdr_node(conn, node_info->node_name) == false) - { - status = CHECK_STATUS_CRITICAL; - appendPQExpBufferStr(&details, - _("node is not an active BDR node")); - } - else - { - appendPQExpBufferStr(&details, - _("node is an active BDR node")); - } - } - } default: break; } diff --git a/repmgr-action-witness.c b/repmgr-action-witness.c index 780466ec..087081d9 100644 --- a/repmgr-action-witness.c +++ b/repmgr-action-witness.c @@ -74,18 +74,6 @@ do_witness_register(void) exit(ERR_BAD_CONFIG); } - /* check that witness node is not a BDR node */ - if (is_bdr_db_quiet(witness_conn) == true) - { - log_error(_("witness node is a BDR node")); - log_hint(_("a witness node cannot be configured for a BDR cluster")); - - PQfinish(witness_conn); - - exit(ERR_BAD_CONFIG); - } - - /* connect to primary with provided parameters */ log_info(_("connecting to primary node")); @@ -194,19 +182,6 @@ do_witness_register(void) } } - /* check that primary node is not a BDR node */ - if (is_bdr_db_quiet(primary_conn) == true) - { - log_error(_("primary node is a BDR node")); - log_hint(_("a witness node cannot be configured for a BDR cluster")); - - PQfinish(witness_conn); - PQfinish(primary_conn); - - exit(ERR_BAD_CONFIG); - } - - /* create repmgr extension, if does not exist */ if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn)) { diff --git a/repmgr-client.c b/repmgr-client.c index 0d0576b5..7cd6ee25 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -18,9 +18,6 @@ * STANDBY FOLLOW * STANDBY SWITCHOVER * - * BDR REGISTER - * BDR UNREGISTER - * * CLUSTER SHOW * CLUSTER EVENT * CLUSTER CROSSCHECK @@ -67,7 +64,6 @@ #include "repmgr-action-primary.h" #include "repmgr-action-standby.h" #include "repmgr-action-witness.h" -#include "repmgr-action-bdr.h" #include "repmgr-action-node.h" #include "repmgr-action-cluster.h" #include "repmgr-action-service.h" @@ -817,7 +813,6 @@ main(int argc, char **argv) * { PRIMARY | MASTER } REGISTER | * STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } | * WITNESS { CREATE | REGISTER | UNREGISTER } - * BDR { REGISTER | UNREGISTER } | * NODE { STATUS | CHECK | REJOIN | SERVICE } | * CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP } * SERVICE { STATUS | PAUSE | UNPAUSE | START | STOP } @@ -887,6 +882,7 @@ main(int argc, char **argv) else if (strcasecmp(repmgr_action, "STATUS") == 0) action = NODE_STATUS; } + else if (strcasecmp(repmgr_command, "WITNESS") == 0) { if (help_option == true) @@ -899,23 +895,6 @@ main(int argc, char **argv) else if (strcasecmp(repmgr_action, "UNREGISTER") == 0) action = WITNESS_UNREGISTER; } - else if (strcasecmp(repmgr_command, "BDR") == 0) - { - if (help_option == true) - { - do_bdr_help(); - exit(SUCCESS); - } - - if (strcasecmp(repmgr_action, "REGISTER") == 0) - action = BDR_REGISTER; - else if (strcasecmp(repmgr_action, "UNREGISTER") == 0) - action = BDR_UNREGISTER; - else if (strcasecmp(repmgr_action, "CHECK") == 0) - action = NODE_CHECK; - else if (strcasecmp(repmgr_action, "STATUS") == 0) - action = NODE_STATUS; - } else if (strcasecmp(repmgr_command, "NODE") == 0) { @@ -1355,13 +1334,6 @@ main(int argc, char **argv) case WITNESS_UNREGISTER: do_witness_unregister(); break; - /* BDR */ - case BDR_REGISTER: - do_bdr_register(); - break; - case BDR_UNREGISTER: - do_bdr_unregister(); - break; /* NODE */ case NODE_STATUS: @@ -1681,7 +1653,6 @@ check_cli_parameters(const int action) case STANDBY_CLONE: case STANDBY_REGISTER: case STANDBY_FOLLOW: - case BDR_REGISTER: break; default: item_list_append_format(&cli_warnings, @@ -2220,7 +2191,6 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu break; case WITNESS: - case BDR: { /* node is reachable */ if (node_info->node_status == NODE_STATUS_UP) @@ -2469,11 +2439,6 @@ action_name(const int action) case WITNESS_UNREGISTER: return "WITNESS UNREGISTER"; - case BDR_REGISTER: - return "BDR REGISTER"; - case BDR_UNREGISTER: - return "BDR UNREGISTER"; - case NODE_STATUS: return "NODE STATUS"; case NODE_CHECK: @@ -2609,7 +2574,6 @@ do_help(void) printf(_("Usage:\n")); printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname()); printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname()); - printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname()); printf(_(" %s [OPTIONS] node {status|check|rejoin|service}\n"), progname()); printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname()); printf(_(" %s [OPTIONS] witness {register|unregister}\n"), progname()); @@ -2618,7 +2582,7 @@ do_help(void) puts(""); - printf(_(" Execute \"%s {primary|standby|bdr|node|cluster|witness|service} --help\" to see command-specific options\n"), progname()); + printf(_(" Execute \"%s {primary|standby|node|cluster|witness|service} --help\" to see command-specific options\n"), progname()); puts(""); @@ -2742,14 +2706,7 @@ create_repmgr_extension(PGconn *conn) /* 4. Create extension */ - initPQExpBuffer(&query); - - wrap_ddl_query(&query, config_file_options.replication_type, - "CREATE EXTENSION repmgr"); - - res = PQexec(schema_create_conn, query.data); - - termPQExpBuffer(&query); + res = PQexec(schema_create_conn, "CREATE EXTENSION repmgr"); if ((PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)) { @@ -2770,13 +2727,13 @@ create_repmgr_extension(PGconn *conn) { initPQExpBuffer(&query); - wrap_ddl_query(&query, config_file_options.replication_type, - "GRANT USAGE ON SCHEMA repmgr TO %s", - userinfo.username); + appendPQExpBuffer(&query, + "GRANT USAGE ON SCHEMA repmgr TO %s", + userinfo.username); res = PQexec(schema_create_conn, query.data); - termPQExpBuffer(&query); + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to grant usage on \"repmgr\" extension to %s:\n %s"), @@ -2791,12 +2748,12 @@ create_repmgr_extension(PGconn *conn) } initPQExpBuffer(&query); - wrap_ddl_query(&query, config_file_options.replication_type, - "GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s", - userinfo.username); + + appendPQExpBuffer(&query, + "GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s", + userinfo.username); res = PQexec(schema_create_conn, query.data); - termPQExpBuffer(&query); if (PQresultStatus(res) != PGRES_COMMAND_OK) diff --git a/repmgr-client.h b/repmgr-client.h index 83132298..95abb55a 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -34,23 +34,21 @@ #define STANDBY_SWITCHOVER 8 #define WITNESS_REGISTER 9 #define WITNESS_UNREGISTER 10 -#define BDR_REGISTER 11 -#define BDR_UNREGISTER 12 -#define NODE_STATUS 13 -#define NODE_CHECK 14 -#define NODE_SERVICE 15 -#define NODE_REJOIN 16 -#define NODE_CONTROL 17 -#define CLUSTER_SHOW 18 -#define CLUSTER_CLEANUP 19 -#define CLUSTER_MATRIX 20 -#define CLUSTER_CROSSCHECK 21 -#define CLUSTER_EVENT 22 -#define SERVICE_STATUS 23 -#define SERVICE_PAUSE 24 -#define SERVICE_UNPAUSE 25 -#define DAEMON_START 26 -#define DAEMON_STOP 27 +#define NODE_STATUS 11 +#define NODE_CHECK 12 +#define NODE_SERVICE 13 +#define NODE_REJOIN 14 +#define NODE_CONTROL 15 +#define CLUSTER_SHOW 16 +#define CLUSTER_CLEANUP 17 +#define CLUSTER_MATRIX 18 +#define CLUSTER_CROSSCHECK 19 +#define CLUSTER_EVENT 20 +#define SERVICE_STATUS 21 +#define SERVICE_PAUSE 22 +#define SERVICE_UNPAUSE 23 +#define DAEMON_START 24 +#define DAEMON_STOP 25 /* command line options without short versions */ #define OPT_HELP 1001 diff --git a/repmgr.c b/repmgr.c index 9a54e588..1303f41b 100644 --- a/repmgr.c +++ b/repmgr.c @@ -84,8 +84,6 @@ typedef struct repmgrdSharedState int current_electoral_term; int candidate_node_id; bool follow_new_primary; - /* BDR failover */ - int bdr_failover_handler; } repmgrdSharedState; static repmgrdSharedState *shared_state = NULL; @@ -131,12 +129,6 @@ PG_FUNCTION_INFO_V1(get_new_primary); Datum reset_voting_status(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(reset_voting_status); -Datum am_bdr_failover_handler(PG_FUNCTION_ARGS); -PG_FUNCTION_INFO_V1(am_bdr_failover_handler); - -Datum unset_bdr_failover_handler(PG_FUNCTION_ARGS); -PG_FUNCTION_INFO_V1(unset_bdr_failover_handler); - Datum set_repmgrd_pid(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(set_repmgrd_pid); @@ -241,7 +233,6 @@ repmgr_shmem_startup(void) shared_state->voting_status = VS_NO_VOTE; shared_state->candidate_node_id = UNKNOWN_NODE_ID; shared_state->follow_new_primary = false; - shared_state->bdr_failover_handler = UNKNOWN_NODE_ID; } LWLockRelease(AddinShmemInitLock); @@ -571,63 +562,6 @@ reset_voting_status(PG_FUNCTION_ARGS) } -Datum -am_bdr_failover_handler(PG_FUNCTION_ARGS) -{ - int node_id = UNKNOWN_NODE_ID; - bool am_handler = false; - - if (!shared_state) - PG_RETURN_NULL(); - - if (PG_ARGISNULL(0)) - PG_RETURN_NULL(); - - node_id = PG_GETARG_INT32(0); - - LWLockAcquire(shared_state->lock, LW_SHARED); - - if (shared_state->bdr_failover_handler == UNKNOWN_NODE_ID) - { - LWLockRelease(shared_state->lock); - LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); - shared_state->bdr_failover_handler = node_id; - am_handler = true; - } - else if (shared_state->bdr_failover_handler == node_id) - { - am_handler = true; - } - - LWLockRelease(shared_state->lock); - - PG_RETURN_BOOL(am_handler); -} - - -Datum -unset_bdr_failover_handler(PG_FUNCTION_ARGS) -{ - if (!shared_state) - PG_RETURN_NULL(); - - LWLockAcquire(shared_state->lock, LW_SHARED); - - /* only do something if local_node_id is initialised */ - if (shared_state->local_node_id != UNKNOWN_NODE_ID) - { - LWLockRelease(shared_state->lock); - LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); - - shared_state->bdr_failover_handler = UNKNOWN_NODE_ID; - } - - LWLockRelease(shared_state->lock); - - PG_RETURN_VOID(); -} - - /* * Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd * process not running (TODO!) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 1d7ffb1e..cd874e3c 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -71,8 +71,7 @@ #replication_user='repmgr' # User to make replication connections with, if not set # defaults to the user defined in "conninfo". -#replication_type='physical' # Must be one of "physical" or "bdr". - # NOTE: "bdr" can only be used with BDR 2.x +#replication_type='physical' # Must "physical" (the default). #location='default' # An arbitrary string defining the location of the node; this # is used during failover to check visibility of the @@ -290,7 +289,6 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # node or follow the new upstream node # 'manual': repmgrd will take no action and the node will require # manual attention to reattach it to replication - # (does not apply to BDR mode) #priority=100 # indicates a preferred priority for promoting nodes; # a value of zero prevents the node being promoted to primary @@ -436,12 +434,3 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # issues with shutting down the demotion candidate. -#------------------------------------------------------------------------------ -# BDR monitoring options -#------------------------------------------------------------------------------ - -#bdr_local_monitoring_only=false # Only monitor the local node; no checks will be - # performed on the other node -#bdr_recovery_timeout # If a BDR node was offline and has become available - # maximum length of time in seconds to wait for the - # node to reconnect to the cluster diff --git a/repmgr.h b/repmgr.h index 7ca53ae5..9ebb8d3c 100644 --- a/repmgr.h +++ b/repmgr.h @@ -78,10 +78,8 @@ #define MIN_SUPPORTED_VERSION_NUM 90300 #define REPLICATION_TYPE_PHYSICAL 1 -#define REPLICATION_TYPE_BDR 2 #define UNKNOWN_SERVER_VERSION_NUM -1 -#define UNKNOWN_BDR_VERSION_NUM -1 #define UNKNOWN_REPMGR_VERSION_NUM -1 #define UNKNOWN_TIMELINE_ID -1 @@ -98,8 +96,6 @@ #define ARCHIVE_STATUS_DIR_ERROR -1 #define NO_DEGRADED_MONITORING_ELAPSED -1 -#define BDR2_REPLICATION_SET_NAME "repmgr" - /* * various default values - ensure repmgr.conf.sample is update * if any of these are changed @@ -113,7 +109,6 @@ #define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ #define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */ #define DEFAULT_STANDBY_FOLLOW_TIMEOUT 30 /* seconds */ -#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */ #define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */ #define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */ #define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */ diff --git a/repmgrd-bdr.c b/repmgrd-bdr.c deleted file mode 100644 index 56cd65cd..00000000 --- a/repmgrd-bdr.c +++ /dev/null @@ -1,678 +0,0 @@ -/* - * repmgrd-bdr.c - BDR functionality for repmgrd - * - * Copyright (c) 2ndQuadrant, 2010-2020 - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include "repmgr.h" -#include "repmgrd.h" -#include "repmgrd-bdr.h" -#include "configfile.h" - - -static void do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node); -static void do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node); - - -void -do_bdr_node_check(void) -{ - /* nothing to do at the moment */ -} - -void -handle_sigint_bdr(SIGNAL_ARGS) -{ - PQExpBufferData event_details; - - initPQExpBuffer(&event_details); - - appendPQExpBuffer(&event_details, - _("%s signal received"), - postgres_signal_arg == SIGTERM - ? "TERM" : "INT"); - - log_notice("%s", event_details.data); - - create_event_notification(local_conn, - &config_file_options, - config_file_options.node_id, - "repmgrd_shutdown", - true, - event_details.data); - termPQExpBuffer(&event_details); - - terminate(SUCCESS); -} - - -void -monitor_bdr(void) -{ - NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; - t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER; - RecordStatus record_status; - NodeInfoListCell *cell; - instr_time log_status_interval_start; - - /* sanity check local database */ - log_info(_("connecting to local database \"%s\""), - config_file_options.conninfo); - - local_conn = establish_db_connection(config_file_options.conninfo, true); - - /* - * Local node must be running - */ - if (PQstatus(local_conn) != CONNECTION_OK) - { - log_error(_("unable connect to local node (ID: %i), terminating"), - local_node_info.node_id); - log_hint(_("local node must be running before repmgrd can start")); - PQfinish(local_conn); - exit(ERR_DB_CONN); - } - - /* - * Verify that database is a BDR one TODO: check if supported BDR version? - */ - log_info(_("connected to database, checking for BDR")); - - if (!is_bdr_db(local_conn, NULL)) - { - log_error(_("database is not BDR-enabled")); - PQfinish(local_conn); - exit(ERR_BAD_CONFIG); - } - - /* - * Check this is a supported BDR version (basically BDR 2.x) - */ - if (get_bdr_version_num() > 2) - { - log_error(_("\"bdr\" mode is for BDR 2.x only")); - log_hint(_("for BDR 3 and later, use \"replication_type=physical\"")); - log_error(_("database is not BDR-enabled")); - exit(ERR_DB_CONN); - } - - if (is_table_in_bdr_replication_set(local_conn, "nodes", "repmgr") == false) - { - log_error(_("repmgr metadata table 'repmgr.%s' is not in the 'repmgr' replication set"), - "nodes"); - - /* - * TODO: add `repmgr bdr sync` or similar for this situation, and hint - * here - */ - - exit(ERR_BAD_CONFIG); - } - - record_status = get_bdr_node_record_by_name(local_conn, local_node_info.node_name, &bdr_node_info); - - if (record_status != RECORD_FOUND) - { - log_error(_("unable to retrieve BDR record for node \"%s\", terminating"), - local_node_info.node_name); - PQfinish(local_conn); - exit(ERR_BAD_CONFIG); - } - - if (local_node_info.active == false) - { - log_error(_("local node (ID: %i) is marked as inactive in repmgr"), - local_node_info.node_id); - log_hint(_("if the node has been reactivated, run \"repmgr bdr register --force\" and restart repmgrd")); - PQfinish(local_conn); - exit(ERR_BAD_CONFIG); - } - - if (is_active_bdr_node(local_conn, local_node_info.node_name) == false) - { - log_error(_("BDR node \"%s\" is not active, terminating"), - local_node_info.node_name); - PQfinish(local_conn); - exit(ERR_BAD_CONFIG); - } - - /* Log startup event */ - create_event_record(local_conn, - &config_file_options, - config_file_options.node_id, - "repmgrd_start", - true, - NULL); - - /* - * retrieve list of all nodes - we'll need these if the DB connection goes - * away - */ - if (get_all_node_records(local_conn, &nodes) == false) - { - /* get_all_node_records() will display the error */ - PQfinish(local_conn); - exit(ERR_BAD_CONFIG); - } - - - /* we're expecting all (both) nodes to be up */ - for (cell = nodes.head; cell; cell = cell->next) - { - cell->node_info->node_status = NODE_STATUS_UP; - } - - log_info(_("starting continuous BDR node monitoring on node %i"), - config_file_options.node_id); - - INSTR_TIME_SET_CURRENT(log_status_interval_start); - - while (true) - { - - /* monitoring loop */ - log_verbose(LOG_DEBUG, "BDR check loop - checking %i nodes", nodes.node_count); - - for (cell = nodes.head; cell; cell = cell->next) - { - if (config_file_options.bdr_local_monitoring_only == true - && cell->node_info->node_id != local_node_info.node_id) - { - continue; - } - - if (cell->node_info->node_id == local_node_info.node_id) - { - log_debug("checking local node %i in %s state", - local_node_info.node_id, - print_monitoring_state(cell->node_info->monitoring_state)); - } - else - { - log_debug("checking other node %i in %s state", - cell->node_info->node_id, - print_monitoring_state(cell->node_info->monitoring_state)); - } - - - switch (cell->node_info->monitoring_state) - { - case MS_NORMAL: - { - if (is_server_available(cell->node_info->conninfo) == false) - { - /* node is down, we were expecting it to be up */ - if (cell->node_info->node_status == NODE_STATUS_UP) - { - instr_time node_unreachable_start; - - INSTR_TIME_SET_CURRENT(node_unreachable_start); - - cell->node_info->node_status = NODE_STATUS_DOWN; - - if (cell->node_info->conn != NULL) - { - PQfinish(cell->node_info->conn); - cell->node_info->conn = NULL; - } - - log_warning(_("unable to connect to node \"%s\" (ID %i)"), - cell->node_info->node_name, cell->node_info->node_id); - try_reconnect(&cell->node_info->conn, cell->node_info); - - /* node has recovered - log and continue */ - if (cell->node_info->node_status == NODE_STATUS_UP) - { - int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start); - PQExpBufferData event_details; - - initPQExpBuffer(&event_details); - - appendPQExpBuffer(&event_details, - _("reconnected to node %i after %i seconds"), - cell->node_info->node_id, - node_unreachable_elapsed); - log_notice("%s", event_details.data); - - create_event_notification(cell->node_info->conn, - &config_file_options, - config_file_options.node_id, - "bdr_reconnect", - true, - event_details.data); - termPQExpBuffer(&event_details); - - goto loop; - } - - /* still down after reconnect attempt(s) */ - if (cell->node_info->node_status == NODE_STATUS_DOWN) - { - do_bdr_failover(&nodes, cell->node_info); - goto loop; - } - } - } - } - break; - case MS_DEGRADED: - { - /* degraded monitoring */ - if (is_server_available(cell->node_info->conninfo) == true) - { - do_bdr_recovery(&nodes, cell->node_info); - } - - } - break; - } - } - -loop: - - /* emit "still alive" log message at regular intervals, if requested */ - if (config_file_options.log_status_interval > 0) - { - int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start); - if (log_status_interval_elapsed >= config_file_options.log_status_interval) - { - log_info(_("monitoring BDR replication status on node \"%s\" (ID: %i)"), - local_node_info.node_name, - local_node_info.node_id); - - for (cell = nodes.head; cell; cell = cell->next) - { - if (cell->node_info->monitoring_state == MS_DEGRADED) - { - log_detail(_("monitoring node \"%s\" (ID: %i) in degraded mode"), - cell->node_info->node_name, - cell->node_info->node_id); - } - } - INSTR_TIME_SET_CURRENT(log_status_interval_start); - } - } - - if (got_SIGHUP) - { - /* - * if we can reload, then could need to change local_conn - */ - if (reload_config(&config_file_options, BDR)) - { - PQfinish(local_conn); - local_conn = establish_db_connection(config_file_options.conninfo, true); - update_registration(local_conn); - } - - got_SIGHUP = false; - } - - /* XXX this looks like it will never be called */ - if (got_SIGHUP) - { - log_debug("SIGHUP received"); - - if (reload_config(&config_file_options, BDR)) - { - PQfinish(local_conn); - local_conn = establish_db_connection(config_file_options.conninfo, true); - - if (*config_file_options.log_file) - { - FILE *fd; - - fd = freopen(config_file_options.log_file, "a", stderr); - if (fd == NULL) - { - fprintf(stderr, "error reopening stderr to \"%s\": %s", - config_file_options.log_file, strerror(errno)); - } - } - } - got_SIGHUP = false; - } - - log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")", - config_file_options.monitor_interval_secs); - sleep(config_file_options.monitor_interval_secs); - } - - return; -} - -/* - * do_bdr_failover() - * - * Here we attempt to perform a BDR "failover". - * - * As there's no equivalent of a physical replication failover, - * we'll do the following: - * - * - connect to active node - * - generate an event log record on that node - * - optionally execute `bdr_failover_command`, passing the conninfo string - * of that node to the command; this can be used for e.g. reconfiguring - * pgbouncer. - * - */ - -void -do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) -{ - PGconn *next_node_conn = NULL; - NodeInfoListCell *cell; - t_event_info event_info = T_EVENT_INFO_INITIALIZER; - t_node_info target_node = T_NODE_INFO_INITIALIZER; - t_node_info failed_node = T_NODE_INFO_INITIALIZER; - RecordStatus record_status; - - /* if one of the two nodes is down, cluster will be in a degraded state */ - monitored_node->monitoring_state = MS_DEGRADED; - INSTR_TIME_SET_CURRENT(degraded_monitoring_start); - - /* terminate local connection if this is the failed node */ - if (monitored_node->node_id == local_node_info.node_id) - { - PQfinish(local_conn); - local_conn = NULL; - } - - - /* get other node */ - - for (cell = nodes->head; cell; cell = cell->next) - { - log_debug("do_bdr_failover() %s", cell->node_info->node_name); - - /* - * don't attempt to connect to the current monitored node, as that's - * the one which has failed - */ - if (cell->node_info->node_id == monitored_node->node_id) - continue; - - /* TODO: reuse local conn if local node is up */ - next_node_conn = establish_db_connection(cell->node_info->conninfo, false); - - if (PQstatus(next_node_conn) == CONNECTION_OK) - { - record_status = get_node_record(next_node_conn, - cell->node_info->node_id, - &target_node); - - if (record_status == RECORD_FOUND) - { - break; - } - } - - next_node_conn = NULL; - } - - /* shouldn't happen, and if it does, it means everything is down */ - if (next_node_conn == NULL) - { - log_error(_("no other available node found")); - - /* no other nodes found - continue degraded monitoring */ - return; - } - - /* - * check if the node record for the failed node is still marked as active, - * if not it means the other node has done the "failover" already - */ - - record_status = get_node_record(next_node_conn, - monitored_node->node_id, - &failed_node); - - if (record_status == RECORD_FOUND && failed_node.active == false) - { - PQfinish(next_node_conn); - log_notice(_("record for node %i has already been set inactive"), - failed_node.node_id); - return; - } - - if (am_bdr_failover_handler(next_node_conn, local_node_info.node_id) == false) - { - PQfinish(next_node_conn); - log_notice(_("other node's repmgrd is handling failover")); - return; - } - - - /* check here that the node hasn't come back up */ - if (is_server_available(monitored_node->conninfo) == true) - { - log_notice(_("node %i has reappeared, aborting failover"), - monitored_node->node_id); - monitored_node->monitoring_state = MS_NORMAL; - PQfinish(next_node_conn); - } - - log_debug("this node is the failover handler"); - - { - PQExpBufferData event_details; - - initPQExpBuffer(&event_details); - - event_info.conninfo_str = target_node.conninfo; - event_info.node_name = target_node.node_name; - - /* update node record on the active node */ - update_node_record_set_active(next_node_conn, monitored_node->node_id, false); - - log_notice(_("setting node record for node %i to inactive"), monitored_node->node_id); - - appendPQExpBuffer(&event_details, - _("node \"%s\" (ID: %i) detected as failed; next available node is \"%s\" (ID: %i)"), - monitored_node->node_name, - monitored_node->node_id, - target_node.node_name, - target_node.node_id); - - /* - * Create an event record - * - * If we were able to connect to another node, we'll update the event log - * there. - * - * In any case the event notification command will be triggered with the - * event "bdr_failover" - */ - - - create_event_notification_extended(next_node_conn, - &config_file_options, - monitored_node->node_id, - "bdr_failover", - true, - event_details.data, - &event_info); - - log_info("%s", event_details.data); - - termPQExpBuffer(&event_details); - } - - unset_bdr_failover_handler(next_node_conn); - - PQfinish(next_node_conn); - - - return; -} - -static void -do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node) -{ - PGconn *recovered_node_conn; - - t_event_info event_info = T_EVENT_INFO_INITIALIZER; - int i; - bool slot_reactivated = false; - int node_recovery_elapsed; - - char node_name[MAXLEN] = ""; - - log_debug("handling recovery for monitored node %i", monitored_node->node_id); - - recovered_node_conn = establish_db_connection(monitored_node->conninfo, false); - - if (PQstatus(recovered_node_conn) != CONNECTION_OK) - { - PQfinish(recovered_node_conn); - return; - } - - if (PQstatus(local_conn) != CONNECTION_OK) - { - log_debug("no local connection - attempting to reconnect "); - local_conn = establish_db_connection(config_file_options.conninfo, false); - } - - /* - * still unable to connect - the local node is probably down, so we can't - * check for reconnection - */ - if (PQstatus(local_conn) != CONNECTION_OK) - { - PQExpBufferData event_details; - - local_conn = NULL; - log_warning(_("unable to reconnect to local node")); - - initPQExpBuffer(&event_details); - - node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start); - monitored_node->monitoring_state = MS_NORMAL; - monitored_node->node_status = NODE_STATUS_UP; - - appendPQExpBuffer( - &event_details, - _("node \"%s\" (ID: %i) has become available after %i seconds"), - monitored_node->node_name, - monitored_node->node_id, - node_recovery_elapsed); - - log_notice("%s", event_details.data); - - termPQExpBuffer(&event_details); - - PQfinish(recovered_node_conn); - - return; - } - - get_bdr_other_node_name(local_conn, local_node_info.node_id, node_name); - - log_info(_("detected recovery on node \"%s\" (ID: %i), checking status"), - monitored_node->node_name, - monitored_node->node_id); - - for (i = 0; i < config_file_options.bdr_recovery_timeout; i++) - { - ReplSlotStatus slot_status; - - log_debug("checking for state of replication slot for node \"%s\"", node_name); - - slot_status = get_bdr_node_replication_slot_status( - local_conn, - node_name); - - if (slot_status == SLOT_ACTIVE) - { - slot_reactivated = true; - break; - } - - sleep(1); - } - - /* mark node as up */ - monitored_node->node_status = NODE_STATUS_UP; - - if (slot_reactivated == false) - { - log_warning(_("no active replication slot for node \"%s\" found after %i seconds"), - node_name, - config_file_options.bdr_recovery_timeout); - log_detail(_("this probably means inter-node BDR connections have not been re-established")); - PQfinish(recovered_node_conn); - return; - } - - log_info(_("active replication slot for node \"%s\" found after %i seconds"), - node_name, - i); - - node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start); - monitored_node->monitoring_state = MS_NORMAL; - - { - PQExpBufferData event_details; - - initPQExpBuffer(&event_details); - - appendPQExpBuffer(&event_details, - _("node \"%s\" (ID: %i) has recovered after %i seconds"), - monitored_node->node_name, - monitored_node->node_id, - node_recovery_elapsed); - - log_notice("%s", event_details.data); - - - /* other node will generate the event */ - if (monitored_node->node_id == local_node_info.node_id) - { - termPQExpBuffer(&event_details); - PQfinish(recovered_node_conn); - - return; - } - - - /* generate the event on the currently active node only */ - if (monitored_node->node_id != local_node_info.node_id) - { - event_info.conninfo_str = monitored_node->conninfo; - event_info.node_name = monitored_node->node_name; - - create_event_notification_extended(local_conn, - &config_file_options, - config_file_options.node_id, - "bdr_recovery", - true, - event_details.data, - &event_info); - } - - termPQExpBuffer(&event_details); - } - - update_node_record_set_active(local_conn, monitored_node->node_id, true); - - PQfinish(recovered_node_conn); - - return; -} diff --git a/repmgrd-bdr.h b/repmgrd-bdr.h deleted file mode 100644 index 208dad81..00000000 --- a/repmgrd-bdr.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * repmgrd-bdr.h - * Copyright (c) 2ndQuadrant, 2010-2020 - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _REPMGRD_BDR_H_ -#define _REPMGRD_BDR_H_ - -extern void do_bdr_node_check(void); -extern void monitor_bdr(void); - -extern void handle_sigint_bdr(SIGNAL_ARGS); -#endif /* _REPMGRD_BDR_H_ */ diff --git a/repmgrd.c b/repmgrd.c index ff78b1f5..383a6aa9 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -26,7 +26,6 @@ #include "repmgr.h" #include "repmgrd.h" #include "repmgrd-physical.h" -#include "repmgrd-bdr.h" #include "configfile.h" #include "voting.h" @@ -484,9 +483,6 @@ main(int argc, char **argv) case REPLICATION_TYPE_PHYSICAL: log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node")); break; - case REPLICATION_TYPE_BDR: - log_hint(_("check that 'repmgr bdr register' was executed for this node")); - break; } close_connection(&local_conn); @@ -513,12 +509,7 @@ main(int argc, char **argv) } } - if (config_file_options.replication_type == REPLICATION_TYPE_BDR) - { - log_debug("node id is %i", local_node_info.node_id); - do_bdr_node_check(); - } - else + if (config_file_options.replication_type == REPLICATION_TYPE_PHYSICAL) { log_debug("node id is %i, upstream node id is %i", local_node_info.node_id, @@ -526,8 +517,6 @@ main(int argc, char **argv) do_physical_node_check(); } - - if (daemonize == true) { daemonize_process(); @@ -576,9 +565,6 @@ start_monitoring(void) case WITNESS: monitor_streaming_witness(); break; - case BDR: - monitor_bdr(); - return; case UNKNOWN: /* should never happen */ break; @@ -771,10 +757,6 @@ setup_event_handlers(void) */ switch (config_file_options.replication_type) { - case REPLICATION_TYPE_BDR: - pqsignal(SIGINT, handle_sigint_bdr); - pqsignal(SIGTERM, handle_sigint_bdr); - break; case REPLICATION_TYPE_PHYSICAL: pqsignal(SIGINT, handle_sigint_physical); pqsignal(SIGTERM, handle_sigint_physical); diff --git a/scripts/bdr-pgbouncer.sh b/scripts/bdr-pgbouncer.sh deleted file mode 100644 index fa244cc9..00000000 --- a/scripts/bdr-pgbouncer.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -set -u -set -e - -# Process parameters passed to script -# ----------------------------------- -# -# This assumes the repmgr "event_notification_command" is defined like this: -# -# event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1' -# -# Adjust as appropriate. - -NODE_ID=$1 -EVENT_TYPE=$2 -SUCCESS=$3 -NEXT_CONNINFO=$4 -NEXT_NODE_NAME=$5 - -if [ "$EVENT_TYPE" != "bdr_failover" ]; then - echo "unable to handle event type '$EVENT_TYPE'" - exit -fi - -# Define database name here -# ------------------------- -# -# Note: this assumes the BDR-enabled database has the same name on -# both hosts - -BDR_DBNAME=bdr_db - -# Define PgBouncer hosts here -# --------------------------- - -PGBOUNCER_HOSTS="host1 host2" -PGBOUNCER_PORTS=(6432 6432) -PGBOUNCER_DATABASE_INI=(/path/to/pgbouncer.database.ini /path/to/pgbouncer.database.ini) - - -# Define local host info here -# --------------------------- - -THIS_HOST="host1" -THIS_PGBOUNCER_PORT="6432" -THIS_DB_PORT="5432" - -# Pause all pgbouncer nodes to minimize impact on clients -# ------------------------------------------------------- - -i=0 -for HOST in $PGBOUNCER_HOSTS -do - PORT="${PGBOUNCER_PORTS[$i]}" - - psql -tc "pause" -h $HOST -p $PORT -U postgres pgbouncer - - i=$((i+1)) -done - -# Copy pgbouncer database ini file to all nodes and restart pgbouncer -# ------------------------------------------------------------------- - -i=0 -THIS_HOSTPORT="$THIS_HOST$THIS_PGBOUNCER_PORT" -PGBOUNCER_DATABASE_INI_NEW="/tmp/pgbouncer.database.ini.new" - -for HOST in $PGBOUNCER_HOSTS -do - PORT="${PGBOUNCER_PORTS[$i]}" - - # Recreate the pgbouncer config file - # ---------------------------------- - echo -e "[databases]\n" > $PGBOUNCER_DATABASE_INI_NEW - - echo -e "$BDR_DBNAME= $NEXT_CONNINFO application_name=pgbouncer_$PORT" >> $PGBOUNCER_DATABASE_INI_NEW - - # Copy file to host - # ----------------- - CONFIG="${PGBOUNCER_DATABASE_INI[$i]}" - - if [ "$HOST$PORT" != "$THIS_HOSTPORT" ]; then - rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$CONFIG - else - cp $PGBOUNCER_DATABASE_INI_NEW $CONFIG - fi - - # Reload and resume PgBouncer - # --------------------------- - - psql -tc "reload" -h $HOST -p $PORT -U postgres pgbouncer - psql -tc "resume" -h $HOST -p $PORT -U postgres pgbouncer - - i=$((i+1)) -done - - -# Clean up generated file -rm $PGBOUNCER_DATABASE_INI_NEW - -echo "Reconfiguration of pgbouncer complete" diff --git a/sql/repmgr_extension.sql b/sql/repmgr_extension.sql index dbc8cb57..e7997256 100644 --- a/sql/repmgr_extension.sql +++ b/sql/repmgr_extension.sql @@ -17,8 +17,6 @@ SELECT * FROM repmgr.replication_status; SELECT * FROM repmgr.show_nodes; -- functions -SELECT repmgr.am_bdr_failover_handler(-1); -SELECT repmgr.am_bdr_failover_handler(NULL); SELECT repmgr.get_new_primary(); SELECT repmgr.notify_follow_primary(-1); SELECT repmgr.notify_follow_primary(NULL); @@ -27,4 +25,3 @@ SELECT repmgr.set_local_node_id(-1); SELECT repmgr.set_local_node_id(NULL); SELECT repmgr.standby_get_last_updated(); SELECT repmgr.standby_set_last_updated(); -SELECT repmgr.unset_bdr_failover_handler();