diff --git a/configfile.c b/configfile.c index a30a7f8d..917a38be 100644 --- a/configfile.c +++ b/configfile.c @@ -364,6 +364,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->primary_visibility_consensus = false; memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command)); options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL; + options->child_nodes_check_interval = DEFAULT_CHILD_NODES_CHECK_INTERVAL; + memset(options->child_nodes_disconnect_command, 0, sizeof(options->child_nodes_disconnect_command)); + options->child_nodes_disconnect_min_count = DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT; + options->child_nodes_connected_min_count = DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT; + options->child_nodes_disconnect_timeout = DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT; /*------------- * witness settings @@ -662,6 +667,16 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command)); else if (strcmp(name, "election_rerun_interval") == 0) options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0); + else if (strcmp(name, "child_nodes_check_interval") == 0) + options->child_nodes_check_interval = repmgr_atoi(value, name, error_list, 1); + else if (strcmp(name, "child_nodes_disconnect_command") == 0) + snprintf(options->child_nodes_disconnect_command, sizeof(options->child_nodes_disconnect_command), "%s", value); + else if (strcmp(name, "child_nodes_disconnect_min_count") == 0) + options->child_nodes_disconnect_min_count = repmgr_atoi(value, name, error_list, -1); + else if (strcmp(name, "child_nodes_connected_min_count") == 0) + options->child_nodes_connected_min_count = repmgr_atoi(value, name, error_list, -1); + else if (strcmp(name, "child_nodes_disconnect_timeout") == 0) + options->child_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0); /* witness settings */ else if (strcmp(name, "witness_sync_interval") == 0) @@ -1100,6 +1115,11 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL * - async_query_timeout * - bdr_local_monitoring_only * - bdr_recovery_timeout + * - child_nodes_check_interval + * - child_nodes_connected_min_count + * - child_nodes_disconnect_command + * - child_nodes_disconnect_min_count + * - child_nodes_disconnect_timeout * - connection_check_type * - conninfo * - degraded_monitoring_timeout @@ -1247,6 +1267,84 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type) config_changed = true; } + /* child_nodes_check_interval */ + if (orig_options->child_nodes_check_interval != new_options.child_nodes_check_interval) + { + if (new_options.child_nodes_check_interval < 0) + { + log_error(_("\"child_nodes_check_interval\" must be \"0\" or greater; provided: \"%i\""), + new_options.child_nodes_check_interval); + } + else + { + orig_options->child_nodes_check_interval = new_options.child_nodes_check_interval; + log_info(_("\"child_nodes_check_interval\" is now \"%i\""), new_options.child_nodes_check_interval); + + config_changed = true; + } + } + + /* child_nodes_disconnect_command */ + if (strncmp(orig_options->child_nodes_disconnect_command, new_options.child_nodes_disconnect_command, sizeof(orig_options->child_nodes_disconnect_command)) != 0) + { + snprintf(orig_options->child_nodes_disconnect_command, sizeof(orig_options->child_nodes_disconnect_command), + "%s", new_options.child_nodes_disconnect_command); + log_info(_("\"child_nodes_disconnect_command\" is now \"%s\""), new_options.child_nodes_disconnect_command); + + config_changed = true; + } + + /* child_nodes_disconnect_min_count */ + if (orig_options->child_nodes_disconnect_min_count != new_options.child_nodes_disconnect_min_count) + { + if (new_options.child_nodes_disconnect_min_count < 0) + { + log_error(_("\"child_nodes_disconnect_min_count\" must be \"0\" or greater; provided: \"%i\""), + new_options.child_nodes_disconnect_min_count); + } + else + { + orig_options->child_nodes_disconnect_min_count = new_options.child_nodes_disconnect_min_count; + log_info(_("\"child_nodes_disconnect_min_count\" is now \"%i\""), new_options.child_nodes_disconnect_min_count); + + config_changed = true; + } + } + + /* child_nodes_connected_min_count */ + if (orig_options->child_nodes_connected_min_count != new_options.child_nodes_connected_min_count) + { + if (new_options.child_nodes_connected_min_count < 0) + { + log_error(_("\"child_nodes_connected_min_count\" must be \"0\" or greater; provided: \"%i\""), + new_options.child_nodes_connected_min_count); + } + else + { + orig_options->child_nodes_connected_min_count = new_options.child_nodes_connected_min_count; + log_info(_("\"child_nodes_connected_min_count\" is now \"%i\""), new_options.child_nodes_connected_min_count); + + config_changed = true; + } + } + + /* child_nodes_disconnect_timeout */ + if (orig_options->child_nodes_disconnect_timeout != new_options.child_nodes_disconnect_timeout) + { + if (new_options.child_nodes_disconnect_timeout < 0) + { + log_error(_("\"child_nodes_disconnect_timeout\" must be \"0\" or greater; provided: \"%i\""), + new_options.child_nodes_disconnect_timeout); + } + else + { + orig_options->child_nodes_disconnect_timeout = new_options.child_nodes_disconnect_timeout; + log_info(_("\"child_nodes_disconnect_timeout\" is now \"%i\""), new_options.child_nodes_disconnect_timeout); + + config_changed = true; + } + } + /* conninfo */ if (strncmp(orig_options->conninfo, new_options.conninfo, sizeof(orig_options->conninfo)) != 0) { diff --git a/configfile.h b/configfile.h index 6fabcd1f..1e4e8572 100644 --- a/configfile.h +++ b/configfile.h @@ -148,6 +148,11 @@ typedef struct bool primary_visibility_consensus; char failover_validation_command[MAXPGPATH]; int election_rerun_interval; + int child_nodes_check_interval; + int child_nodes_disconnect_min_count; + int child_nodes_connected_min_count; + int child_nodes_disconnect_timeout; + char child_nodes_disconnect_command[MAXPGPATH]; /* BDR settings */ bool bdr_local_monitoring_only; @@ -221,6 +226,10 @@ typedef struct DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \ -1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \ CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \ + DEFAULT_CHILD_NODES_CHECK_INTERVAL, \ + DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT, \ + DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT, \ + DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT, "", \ /* BDR settings */ \ false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ diff --git a/dbutils.c b/dbutils.c index 04ae684b..30dac885 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1575,6 +1575,7 @@ identify_system(PGconn *repl_conn, t_system_identification *identification) return true; } + TimeLineHistoryEntry * get_timeline_history(PGconn *repl_conn, TimeLineID tli) { @@ -1672,6 +1673,46 @@ get_timeline_history(PGconn *repl_conn, TimeLineID tli) } +bool +get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list) +{ + PQExpBufferData query; + PGresult *res = NULL; + bool success = true; + + initPQExpBuffer(&query); + + appendPQExpBuffer(&query, + " SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " + " n.slot_name, n.location, n.priority, n.active, n.config_file, " + " '' AS upstream_node_name, " + " CASE WHEN sr.application_name IS NULL THEN FALSE ELSE TRUE END AS attached " + " FROM repmgr.nodes n " + " LEFT JOIN pg_catalog.pg_stat_replication sr " + " ON sr.application_name = n.node_name " + " WHERE n.upstream_node_id = %i ", + node_id); + + log_verbose(LOG_DEBUG, "get_active_sibling_node_records():\n%s", query.data); + + res = PQexec(conn, query.data); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_db_error(conn, query.data, _("get_active_sibling_records(): unable to execute query")); + success = false; + } + + termPQExpBuffer(&query); + + /* this will return an empty list if there was an error executing the query */ + _populate_node_records(res, node_list); + + PQclear(res); + + return success; +} + /* =============================== */ /* repmgrd shared memory functions */ /* =============================== */ @@ -2273,9 +2314,18 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row, bool init_ node_info->active = atobool(PQgetvalue(res, row, 9)); snprintf(node_info->config_file, sizeof(node_info->config_file), "%s", PQgetvalue(res, row, 10)); - /* This won't normally be set */ + /* These are only set by certain queries */ snprintf(node_info->upstream_node_name, sizeof(node_info->upstream_node_name), "%s", PQgetvalue(res, row, 11)); + if (PQgetisnull(res, row, 12)) + { + node_info->attached = NODE_ATTACHED_UNKNOWN; + } + else + { + node_info->attached = atobool(PQgetvalue(res, row, 12)) ? NODE_ATTACHED : NODE_DETACHED; + } + /* Set remaining struct fields with default values */ if (init_defaults == true) @@ -2398,7 +2448,7 @@ get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info) initPQExpBuffer(&query); appendPQExpBuffer(&query, " SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " - " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name " + " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached " " FROM repmgr.nodes n " " LEFT JOIN repmgr.nodes un " " ON un.node_id = n.upstream_node_id" @@ -2698,7 +2748,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list) appendPQExpBufferStr(&query, " SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, " - " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name " + " n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached " " FROM repmgr.nodes n " " LEFT JOIN repmgr.nodes un " " ON un.node_id = n.upstream_node_id" diff --git a/dbutils.h b/dbutils.h index e9bbd475..6776e769 100644 --- a/dbutils.h +++ b/dbutils.h @@ -29,7 +29,7 @@ #include "strutil.h" #include "voting.h" -#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name " +#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name, NULL AS attached " #define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''" #define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name" @@ -92,6 +92,13 @@ typedef enum CONN_ERROR } ConnectionStatus; +typedef enum +{ + NODE_ATTACHED_UNKNOWN = -1, + NODE_DETACHED, + NODE_ATTACHED +} NodeAttached; + typedef enum { SLOT_UNKNOWN = -1, @@ -152,7 +159,7 @@ typedef struct s_node_info /* for ad-hoc use e.g. when working with a list of nodes */ char details[MAXLEN]; bool reachable; - bool attached; + NodeAttached attached; /* various statistics */ int max_wal_senders; int attached_wal_receivers; @@ -429,6 +436,7 @@ int get_primary_node_id(PGconn *conn); int get_ready_archive_files(PGconn *conn, const char *data_directory); bool identify_system(PGconn *repl_conn, t_system_identification *identification); TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli); +bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list); /* repmgrd shared memory functions */ bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index 0a378f5a..a08c40b0 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -15,6 +15,11 @@ See also: + + Release 4.4 + ???, 2019 + + Release 4.3.1 ???, 2019 diff --git a/doc/repmgrd-automatic-failover.sgml b/doc/repmgrd-automatic-failover.sgml index ba162744..522a690b 100644 --- a/doc/repmgrd-automatic-failover.sgml +++ b/doc/repmgrd-automatic-failover.sgml @@ -165,6 +165,111 @@ + + + repmgrd + standby disconnection + + + + repmgrd + child node disconnection + + + Monitoring standby disconnections on the primary node + + + + This functionality is available in &repmgr 4.4 and later. + + + + When running on the primary node, repmgrd can + monitor connections and in particular disconnections by its attached + child nodes (standbys), and optionally execute a custom command + if certain criteria are met (such as the number of attached nodes falling to + zero following a failover to a new primary); this command can be used for + example to "fence" the node and ensure it is isolated from any + applications attempting to access the replication cluster. + + + + + + + Every few seconds (defined by the configuration parameter child_nodes_check_interval + (a value of 0 disables this altogether), repmgrd queries + the pg_stat_replication system view and compares + the nodes present there against the list of nodes registered with &repmgr; which + should be attached to the primary. + + + + + + If a child node (standby) is no longer present in pg_stat_replication, + repmgrd notes the time it detected the node's absence, and additionally generates a + child_node_disconnect event. + + + + + + If a chile node (standby) which was absent from pg_stat_replication reappears, + repmgrd clears the time it detected the node's absence, and additionally generates a + child_node_reconnect event. + + + + + + If an entirely new child node (standby) is detected, repmgrd adds it to its internal list + and additionally generates a child_node_new_connect event. + + + + + + If the child_nodes_disconnect_command parameter is set in + repmgr.conf, repmgrd will then loop through all child nodes. + If it determines that insufficient child nodes are connected, and a + minimum of child_nodes_disconnect_timeout seconds (default: 30 + has elapsed since the last node became disconnected, repmgrd will then execute the + child_nodes_disconnect_command script. + + + By default, the child_nodes_disconnect_command will only be executed + if all child nodes are disconnected. If child_nodes_connected_min_count + is set, the child_nodes_disconnect_command script will be triggered + if the number of connected child nodes falls below the specified value (e.g. + if set to 2, the script will be triggered if only one child node + is connected). Alternatively, if child_nodes_disconnect_min_count + and more than that number of child nodes disconnects, the script will be triggered. + + + The child_nodes_disconnect_command script will only be executed once + while the criteria for its execution are met. If the criteria for its execution are no longer + met (i.e. some child nodes have reconnected), it will be executed again if + the criteria for its execution are met again. + + + The child_nodes_disconnect_command script will not be executed if repmgrd is paused. + + + + + + Note that child nodes which are not attached when repmgrd + starts will not be considered as missing, as repmgrd + cannot know why they are not attached. + + + + + + + + repmgrd diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 72c2d3bd..7f0b5554 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -346,6 +346,16 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # value: %n (node_id), %a (node_name). *Must* be the same on all nodes. #election_rerun_interval=15 # if "failover_validation_command" is set, and the command returns # an error, pause the specified amount of seconds before rerunning the election. + # + # The following items are relevant for repmgrd running on the primary, + # and will be ignored on non-primary nodes +#child_nodes_check_interval=5 # Interval (in seconds) to check for attached child nodes (standbys) +#child_nodes_connected_min_count=-1 # Minimum number of child nodes which must remain connected, otherwise + # disconnection command will be triggered +#child_nodes_disconnect_min_count=-1 # Minimum number of disconnected child nodes required to execute disconnection command + # (ignored if "child_nodes_connected_min_count" set) +#child_nodes_disconnect_timeout=30 # Interval between child node disconnection and disconnection command execution +#child_nodes_disconnect_command= # Command to execute if child node disconnection detected #------------------------------------------------------------------------------ # service control commands diff --git a/repmgr.h b/repmgr.h index 1f88c79d..fc386f02 100644 --- a/repmgr.h +++ b/repmgr.h @@ -96,6 +96,11 @@ #define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */ #define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ #define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */ +#define DEFAULT_CHILD_NODES_CHECK_INTERVAL 5 /* seconds */ +#define DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT -1 +#define DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT -1 +#define DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ + #define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */ diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 71716f5a..a822405a 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -53,6 +53,28 @@ typedef enum } ElectionResult; +typedef struct t_child_node_info +{ + int node_id; + char node_name[NAMEDATALEN]; + NodeAttached attached; + instr_time detached_time; + struct t_child_node_info *next; +} t_child_node_info; + +typedef struct t_child_node_info_list +{ + t_child_node_info *head; + t_child_node_info *tail; + int node_count; +} t_child_node_info_list; + +#define T_CHILD_NODE_INFO_LIST_INITIALIZER { \ + NULL, \ + NULL, \ + 0 \ +} + static PGconn *upstream_conn = NULL; static PGconn *primary_conn = NULL; @@ -63,6 +85,7 @@ static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER; static instr_time last_monitoring_update; +static bool child_nodes_disconnect_command_executed = false; static ElectionResult do_election(NodeInfoList *sibling_nodes, int *new_primary_id); static const char *_print_election_result(ElectionResult result); @@ -73,6 +96,7 @@ static void notify_followers(NodeInfoList *standby_nodes, int follow_node_id); static void check_connection(t_node_info *node_info, PGconn **conn); static bool check_primary_status(int degraded_monitoring_elapsed); +static void check_primary_child_nodes(t_child_node_info_list *local_child_nodes); static bool wait_primary_notification(int *new_primary_id); static FailoverState follow_new_primary(int new_primary_id); @@ -93,6 +117,10 @@ static ElectionResult execute_failover_validation_command(t_node_info *node_info static void parse_failover_validation_command(const char *template, t_node_info *node_info, PQExpBufferData *out); static bool check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_info); +static t_child_node_info *append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached); +static void remove_child_node_record(t_child_node_info_list *nodes, int node_id); +static void clear_child_node_info_list(t_child_node_info_list *nodes); +static void parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id); void handle_sigint_physical(SIGNAL_ARGS) @@ -228,6 +256,8 @@ void monitor_streaming_primary(void) { instr_time log_status_interval_start; + instr_time child_nodes_check_interval_start; + t_child_node_info_list local_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER; reset_node_voting_status(); repmgrd_set_upstream_node_id(local_conn, NO_UPSTREAM_NODE); @@ -269,8 +299,55 @@ monitor_streaming_primary(void) } INSTR_TIME_SET_CURRENT(log_status_interval_start); + INSTR_TIME_SET_CURRENT(child_nodes_check_interval_start); local_node_info.node_status = NODE_STATUS_UP; + /* + * get list of expected and attached nodes + */ + + { + NodeInfoList db_child_node_records = T_NODE_INFO_LIST_INITIALIZER; + bool success = get_child_nodes(local_conn, config_file_options.node_id, &db_child_node_records); + + if (!success) + { + log_error(_("unable to retrieve list of child nodes")); + } + else + { + NodeInfoListCell *cell; + + for (cell = db_child_node_records.head; cell; cell = cell->next) + { + /* + * At startup, if a node for which a repmgr record exists, is not found + * in pg_stat_replication, we can't know whether it has become detached, or + * (e.g. during a provisioning operation) is a new node which has not yet + * attached. We set the status to "NODE_ATTACHED_UNKNOWN" to stop repmgrd + * emitting bogus "node has become detached" alerts. + */ + (void) append_child_node_record(&local_child_nodes, + cell->node_info->node_id, + cell->node_info->node_name, + cell->node_info->attached == NODE_ATTACHED ? NODE_ATTACHED : NODE_ATTACHED_UNKNOWN); + + if (cell->node_info->attached == NODE_ATTACHED) + { + log_info(_("child node \"%s\" (node ID: %i) is attached"), + cell->node_info->node_name, + cell->node_info->node_id); + } + else + { + log_info(_("child node \"%s\" (node ID: %i) is not yet attached"), + cell->node_info->node_name, + cell->node_info->node_id); + } + } + } + } + while (true) { /* @@ -431,6 +508,20 @@ monitor_streaming_primary(void) * starts up check status, switch monitoring mode */ } + else + { + if (config_file_options.child_nodes_check_interval > 0) + { + int child_nodes_check_interval_elapsed = calculate_elapsed(child_nodes_check_interval_start); + + if (child_nodes_check_interval_elapsed >= config_file_options.child_nodes_check_interval) + { + INSTR_TIME_SET_CURRENT(child_nodes_check_interval_start); + check_primary_child_nodes(&local_child_nodes); + } + } + } + loop: /* check node is still primary, if not restart monitoring */ @@ -678,6 +769,406 @@ check_primary_status(int degraded_monitoring_elapsed) } +static void +check_primary_child_nodes(t_child_node_info_list *local_child_nodes) +{ + NodeInfoList db_child_node_records = T_NODE_INFO_LIST_INITIALIZER; + NodeInfoListCell *cell; + /* lists for newly attached and missing nodes */ + t_child_node_info_list disconnected_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER; + t_child_node_info_list reconnected_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER; + t_child_node_info_list new_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER; + + bool success = get_child_nodes(local_conn, config_file_options.node_id, &db_child_node_records); + + if (!success) + { + /* unlikely this will happen, but if it does, we'll try again next time round */ + log_error(_("unable to retrieve list of child nodes")); + return; + } + + if (db_child_node_records.node_count == 0) + { + /* no registered child nodes - nothing to do */ + return; + } + + /* + * compare DB records with our internal list; + * this will tell us about: + * - previously known nodes and their current status + * - newly registered nodes we didn't know about + * + * We'll need to compare the opposite way to check for nodes + * which are in the internal list, but which have now vanished + */ + for (cell = db_child_node_records.head; cell; cell = cell->next) + { + t_child_node_info *local_child_node_rec; + bool local_child_node_rec_found = false; + + log_debug("child node: %i; attached: %s", + cell->node_info->node_id, + cell->node_info->attached == NODE_ATTACHED ? "yes" : "no"); + + for (local_child_node_rec = local_child_nodes->head; local_child_node_rec; local_child_node_rec = local_child_node_rec->next) + { + if (local_child_node_rec->node_id == cell->node_info->node_id) + { + local_child_node_rec_found = true; + break; + } + } + + if (local_child_node_rec_found == true) + { + /* our node record shows node attached, DB record indicates detached */ + if (local_child_node_rec->attached == NODE_ATTACHED && cell->node_info->attached == NODE_DETACHED) + { + t_child_node_info *detached_child_node; + + local_child_node_rec->attached = NODE_DETACHED; + INSTR_TIME_SET_CURRENT(local_child_node_rec->detached_time); + + detached_child_node = append_child_node_record(&disconnected_child_nodes, + local_child_node_rec->node_id, + local_child_node_rec->node_name, + NODE_DETACHED); + detached_child_node->detached_time = local_child_node_rec->detached_time; + } + /* our node record shows node detached, DB record indicates attached */ + else if (local_child_node_rec->attached == NODE_DETACHED && cell->node_info->attached == NODE_ATTACHED) + { + t_child_node_info *attached_child_node; + + local_child_node_rec->attached = NODE_ATTACHED; + + attached_child_node = append_child_node_record(&reconnected_child_nodes, + local_child_node_rec->node_id, + local_child_node_rec->node_name, + NODE_ATTACHED); + attached_child_node->detached_time = local_child_node_rec->detached_time; + INSTR_TIME_SET_ZERO(local_child_node_rec->detached_time); + } + else if (local_child_node_rec->attached == NODE_ATTACHED_UNKNOWN && cell->node_info->attached == NODE_ATTACHED) + { + local_child_node_rec->attached = NODE_ATTACHED; + + append_child_node_record(&new_child_nodes, + local_child_node_rec->node_id, + local_child_node_rec->node_name, + NODE_ATTACHED); + } + } + else + { + /* node we didn't know about before */ + + NodeAttached attached = cell->node_info->attached; + + /* + * node registered but not attached - set state to "UNKNOWN" + * to prevent a bogus "reattach" event being generated + */ + if (attached == NODE_DETACHED) + attached = NODE_ATTACHED_UNKNOWN; + + (void) append_child_node_record(local_child_nodes, + cell->node_info->node_id, + cell->node_info->node_name, + attached); + (void) append_child_node_record(&new_child_nodes, + cell->node_info->node_id, + cell->node_info->node_name, + attached); + } + } + + /* + * Check if any nodes in local list are no longer in list returned + * from database. + */ + { + t_child_node_info *local_child_node_rec; + bool db_node_rec_found = false; + + for (local_child_node_rec = local_child_nodes->head; local_child_node_rec; local_child_node_rec = local_child_node_rec->next) + { + for (cell = db_child_node_records.head; cell; cell = cell->next) + { + if (cell->node_info->node_id == local_child_node_rec->node_id) + { + db_node_rec_found = true; + break; + } + } + + if (db_node_rec_found == false) + { + log_notice(_("child node \"%s\" (node id %i) is no longer connected or registered"), + local_child_node_rec->node_name, + local_child_node_rec->node_id); + remove_child_node_record(local_child_nodes, local_child_node_rec->node_id); + } + } + } + + /* generate "child_node_disconnect" events */ + if (disconnected_child_nodes.node_count > 0) + { + t_child_node_info *child_node_rec; + for (child_node_rec = disconnected_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next) + { + PQExpBufferData event_details; + initPQExpBuffer(&event_details); + appendPQExpBuffer(&event_details, + _("node \"%s\" (node ID: %i) has disconnected"), + child_node_rec->node_name, + child_node_rec->node_id); + log_notice("%s", event_details.data); + + create_event_notification(local_conn, + &config_file_options, + local_node_info.node_id, + "child_node_disconnect", + true, + event_details.data); + + termPQExpBuffer(&event_details); + } + } + + /* generate "child_node_reconnect" events */ + if (reconnected_child_nodes.node_count > 0) + { + t_child_node_info *child_node_rec; + for (child_node_rec = reconnected_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next) + { + PQExpBufferData event_details; + initPQExpBuffer(&event_details); + appendPQExpBuffer(&event_details, + _("node \"%s\" (node ID: %i) has reconnected after %i seconds"), + child_node_rec->node_name, + child_node_rec->node_id, + calculate_elapsed( child_node_rec->detached_time )); + log_notice("%s", event_details.data); + + create_event_notification(local_conn, + &config_file_options, + local_node_info.node_id, + "child_node_reconnect", + true, + event_details.data); + + termPQExpBuffer(&event_details); + } + } + + /* generate "child_node_new_connect" events */ + if (new_child_nodes.node_count > 0) + { + t_child_node_info *child_node_rec; + for (child_node_rec = new_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next) + { + PQExpBufferData event_details; + initPQExpBuffer(&event_details); + appendPQExpBuffer(&event_details, + _("new node \"%s\" (node ID: %i) has connected"), + child_node_rec->node_name, + child_node_rec->node_id); + log_notice("%s", event_details.data); + + create_event_notification(local_conn, + &config_file_options, + local_node_info.node_id, + "child_node_new_connect", + true, + event_details.data); + + termPQExpBuffer(&event_details); + } + } + + + if (config_file_options.child_nodes_disconnect_command[0] != '\0') + { + /* + * script will only be executed if the number of attached + * standbys is lower than this number + */ + int min_required_connected_count = 1; + int connected_count = 0; + + /* + * Calculate hi + */ + + if (config_file_options.child_nodes_connected_min_count > 0) + { + min_required_connected_count = config_file_options.child_nodes_connected_min_count; + } + else if (config_file_options.child_nodes_disconnect_min_count > 0) + { + min_required_connected_count = + (db_child_node_records.node_count - config_file_options.child_nodes_disconnect_min_count) + + 1; + } + + /* calculate number of connected child nodes */ + for (cell = db_child_node_records.head; cell; cell = cell->next) + { + if (cell->node_info->attached == NODE_ATTACHED) + connected_count ++; + } + + log_debug("connected: %i; min required: %i", + connected_count, + min_required_connected_count); + + if (connected_count < min_required_connected_count) + { + log_notice(_("%i (of %i) child nodes are connected, but at least %i child nodes required"), + connected_count, + db_child_node_records.node_count, + min_required_connected_count); + + if (child_nodes_disconnect_command_executed == false) + { + t_child_node_info *child_node_rec; + + /* set these for informative purposes */ + int most_recently_disconnected_node_id = UNKNOWN_NODE_ID; + int most_recently_disconnected_elapsed = -1; + + bool most_recent_disconnect_below_threshold = false; + instr_time current_time_base; + + INSTR_TIME_SET_CURRENT(current_time_base); + + for (child_node_rec = local_child_nodes->head; child_node_rec; child_node_rec = child_node_rec->next) + { + instr_time current_time = current_time_base; + int seconds_since_detached; + + if (child_node_rec->attached != NODE_DETACHED) + continue; + + INSTR_TIME_SUBTRACT(current_time, child_node_rec->detached_time); + seconds_since_detached = (int) INSTR_TIME_GET_DOUBLE(current_time); + + if (seconds_since_detached < config_file_options.child_nodes_disconnect_timeout) + { + most_recent_disconnect_below_threshold = true; + } + + if (most_recently_disconnected_node_id == UNKNOWN_NODE_ID) + { + most_recently_disconnected_node_id = child_node_rec->node_id; + most_recently_disconnected_elapsed = seconds_since_detached; + } + else if (seconds_since_detached < most_recently_disconnected_elapsed) + { + most_recently_disconnected_node_id = child_node_rec->node_id; + most_recently_disconnected_elapsed = seconds_since_detached; + } + } + + + if (most_recent_disconnect_below_threshold == false && most_recently_disconnected_node_id != UNKNOWN_NODE_ID) + { + char parsed_child_nodes_disconnect_command[MAXPGPATH]; + int child_nodes_disconnect_command_result; + PQExpBufferData event_details; + bool success = true; + + parse_child_nodes_disconnect_command(parsed_child_nodes_disconnect_command, + config_file_options.child_nodes_disconnect_command, + local_node_info.node_id); + + log_info(_("most recently detached child node was %i (ca. %i seconds ago), triggering \"child_nodes_disconnect_command\""), + most_recently_disconnected_node_id, + most_recently_disconnected_elapsed); + + log_info(_("\"child_nodes_disconnect_command\" is:\n \"%s\""), + parsed_child_nodes_disconnect_command); + + child_nodes_disconnect_command_result = system(parsed_child_nodes_disconnect_command); + + initPQExpBuffer(&event_details); + + if (child_nodes_disconnect_command_result != 0) + { + success = false; + + appendPQExpBufferStr(&event_details, + _("unable to execute \"child_nodes_disconnect_command\"")); + + log_error("%s", event_details.data); + } + else + { + appendPQExpBufferStr(&event_details, + _("\"child_nodes_disconnect_command\" successfully executed")); + + log_info("%s", event_details.data); + } + + create_event_notification(local_conn, + &config_file_options, + local_node_info.node_id, + "child_nodes_disconnect_command", + success, + event_details.data); + + termPQExpBuffer(&event_details); + + child_nodes_disconnect_command_executed = true; + } + else if (most_recently_disconnected_node_id != UNKNOWN_NODE_ID) + { + log_info(_("most recently detached child node was %i (ca. %i seconds ago), not triggering \"child_nodes_disconnect_command\""), + most_recently_disconnected_node_id, + most_recently_disconnected_elapsed); + log_detail(_("\"child_nodes_disconnect_timeout\" set to %i seconds"), + config_file_options.child_nodes_disconnect_timeout); + } + else + { + log_info(_("no child nodes have detached since repmgrd startup")); + } + } + else + { + log_info(_("\"child_nodes_disconnect_command\" was previously executed, taking no action")); + } + } + else + { + /* + * "child_nodes_disconnect_command" was executed, but for whatever reason + * enough child nodes have returned to clear the threshold; in that case reset + * the executed flag so we can execute the command again, if necessary + */ + if (child_nodes_disconnect_command_executed == true) + { + log_notice(_("%i (of %i) child nodes are now connected, meeting minimum requirement of %i child nodes"), + connected_count, + db_child_node_records.node_count, + min_required_connected_count); + child_nodes_disconnect_command_executed = false; + } + } + } + + clear_child_node_info_list(&disconnected_child_nodes); + clear_child_node_info_list(&reconnected_child_nodes); + clear_child_node_info_list(&new_child_nodes); + + clear_node_info_list(&db_child_node_records); +} + + /* * repmgrd running on a standby server */ @@ -4357,3 +4848,134 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll return can_follow; } + + +static t_child_node_info * +append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached) +{ + t_child_node_info *child_node = pg_malloc0(sizeof(t_child_node_info)); + + child_node->node_id = node_id; + snprintf(child_node->node_name, sizeof(child_node->node_name), "%s", node_name); + child_node->attached = attached; + + if (nodes->tail) + nodes->tail->next = child_node; + else + nodes->head = child_node; + + nodes->tail = child_node; + nodes->node_count++; + + return child_node; +} + + +static void +remove_child_node_record(t_child_node_info_list *nodes, int node_id) +{ + t_child_node_info *node; + t_child_node_info *prev_node = NULL; + t_child_node_info *next_node = NULL; + + node = nodes->head; + + while (node != NULL) + { + next_node = node->next; + + log_debug("ZZZ node: %i", node->node_id); + + if (node->node_id == node_id) + { + /* first node */ + if (node == nodes->head) + { + nodes->head = next_node; + } + /* last node */ + else if (next_node == NULL) + { + prev_node->next = NULL; + } + else + { + prev_node->next = next_node; + } + pfree(node); + nodes->node_count--; + return; + } + else + { + prev_node = node; + } + node = next_node; + } +} + +static void +clear_child_node_info_list(t_child_node_info_list *nodes) +{ + t_child_node_info *node; + t_child_node_info *next_node; + + node = nodes->head; + + while (node != NULL) + { + next_node = node->next; + pfree(node); + node = next_node; + } + + nodes->head = NULL; + nodes->tail = NULL; + nodes->node_count = 0; +} + + +static void +parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id) +{ + const char *src_ptr = NULL; + char *dst_ptr = NULL; + char *end_ptr = NULL; + + dst_ptr = parsed_command; + end_ptr = (parsed_command + MAXPGPATH) - 1; + *end_ptr = '\0'; + + for (src_ptr = template; *src_ptr; src_ptr++) + { + if (*src_ptr == '%') + { + switch (src_ptr[1]) + { + case '%': + /* %%: replace with % */ + if (dst_ptr < end_ptr) + { + src_ptr++; + *dst_ptr++ = *src_ptr; + } + break; + case 'p': + /* %p: node id of the reporting primary */ + src_ptr++; + snprintf(dst_ptr, end_ptr - dst_ptr, "%i", reporting_node_id); + dst_ptr += strlen(dst_ptr); + break; + } + } + else + { + if (dst_ptr < end_ptr) + *dst_ptr++ = *src_ptr; + } + } + + *dst_ptr = '\0'; + + return; +}