mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgrd: monitor standbys attached to primary
This functionality enables repmgrd (when running on the primary) to monitor connected child nodes. It will log connections and disconnections and generate events. Additionally, repmgrd can execute a custom script if the number of connected child nodes falls below a configurable threshold. This script can be used e.g. to "fence" the primary following a failover situation where a new primary has been promoted and all standbys are now child nodes of that primary.
This commit is contained in:
98
configfile.c
98
configfile.c
@@ -364,6 +364,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->primary_visibility_consensus = false;
|
||||
memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command));
|
||||
options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL;
|
||||
options->child_nodes_check_interval = DEFAULT_CHILD_NODES_CHECK_INTERVAL;
|
||||
memset(options->child_nodes_disconnect_command, 0, sizeof(options->child_nodes_disconnect_command));
|
||||
options->child_nodes_disconnect_min_count = DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT;
|
||||
options->child_nodes_connected_min_count = DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT;
|
||||
options->child_nodes_disconnect_timeout = DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT;
|
||||
|
||||
/*-------------
|
||||
* witness settings
|
||||
@@ -662,6 +667,16 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command));
|
||||
else if (strcmp(name, "election_rerun_interval") == 0)
|
||||
options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "child_nodes_check_interval") == 0)
|
||||
options->child_nodes_check_interval = repmgr_atoi(value, name, error_list, 1);
|
||||
else if (strcmp(name, "child_nodes_disconnect_command") == 0)
|
||||
snprintf(options->child_nodes_disconnect_command, sizeof(options->child_nodes_disconnect_command), "%s", value);
|
||||
else if (strcmp(name, "child_nodes_disconnect_min_count") == 0)
|
||||
options->child_nodes_disconnect_min_count = repmgr_atoi(value, name, error_list, -1);
|
||||
else if (strcmp(name, "child_nodes_connected_min_count") == 0)
|
||||
options->child_nodes_connected_min_count = repmgr_atoi(value, name, error_list, -1);
|
||||
else if (strcmp(name, "child_nodes_disconnect_timeout") == 0)
|
||||
options->child_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* witness settings */
|
||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||
@@ -1100,6 +1115,11 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
* - async_query_timeout
|
||||
* - bdr_local_monitoring_only
|
||||
* - bdr_recovery_timeout
|
||||
* - child_nodes_check_interval
|
||||
* - child_nodes_connected_min_count
|
||||
* - child_nodes_disconnect_command
|
||||
* - child_nodes_disconnect_min_count
|
||||
* - child_nodes_disconnect_timeout
|
||||
* - connection_check_type
|
||||
* - conninfo
|
||||
* - degraded_monitoring_timeout
|
||||
@@ -1247,6 +1267,84 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* child_nodes_check_interval */
|
||||
if (orig_options->child_nodes_check_interval != new_options.child_nodes_check_interval)
|
||||
{
|
||||
if (new_options.child_nodes_check_interval < 0)
|
||||
{
|
||||
log_error(_("\"child_nodes_check_interval\" must be \"0\" or greater; provided: \"%i\""),
|
||||
new_options.child_nodes_check_interval);
|
||||
}
|
||||
else
|
||||
{
|
||||
orig_options->child_nodes_check_interval = new_options.child_nodes_check_interval;
|
||||
log_info(_("\"child_nodes_check_interval\" is now \"%i\""), new_options.child_nodes_check_interval);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* child_nodes_disconnect_command */
|
||||
if (strncmp(orig_options->child_nodes_disconnect_command, new_options.child_nodes_disconnect_command, sizeof(orig_options->child_nodes_disconnect_command)) != 0)
|
||||
{
|
||||
snprintf(orig_options->child_nodes_disconnect_command, sizeof(orig_options->child_nodes_disconnect_command),
|
||||
"%s", new_options.child_nodes_disconnect_command);
|
||||
log_info(_("\"child_nodes_disconnect_command\" is now \"%s\""), new_options.child_nodes_disconnect_command);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* child_nodes_disconnect_min_count */
|
||||
if (orig_options->child_nodes_disconnect_min_count != new_options.child_nodes_disconnect_min_count)
|
||||
{
|
||||
if (new_options.child_nodes_disconnect_min_count < 0)
|
||||
{
|
||||
log_error(_("\"child_nodes_disconnect_min_count\" must be \"0\" or greater; provided: \"%i\""),
|
||||
new_options.child_nodes_disconnect_min_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
orig_options->child_nodes_disconnect_min_count = new_options.child_nodes_disconnect_min_count;
|
||||
log_info(_("\"child_nodes_disconnect_min_count\" is now \"%i\""), new_options.child_nodes_disconnect_min_count);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* child_nodes_connected_min_count */
|
||||
if (orig_options->child_nodes_connected_min_count != new_options.child_nodes_connected_min_count)
|
||||
{
|
||||
if (new_options.child_nodes_connected_min_count < 0)
|
||||
{
|
||||
log_error(_("\"child_nodes_connected_min_count\" must be \"0\" or greater; provided: \"%i\""),
|
||||
new_options.child_nodes_connected_min_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
orig_options->child_nodes_connected_min_count = new_options.child_nodes_connected_min_count;
|
||||
log_info(_("\"child_nodes_connected_min_count\" is now \"%i\""), new_options.child_nodes_connected_min_count);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* child_nodes_disconnect_timeout */
|
||||
if (orig_options->child_nodes_disconnect_timeout != new_options.child_nodes_disconnect_timeout)
|
||||
{
|
||||
if (new_options.child_nodes_disconnect_timeout < 0)
|
||||
{
|
||||
log_error(_("\"child_nodes_disconnect_timeout\" must be \"0\" or greater; provided: \"%i\""),
|
||||
new_options.child_nodes_disconnect_timeout);
|
||||
}
|
||||
else
|
||||
{
|
||||
orig_options->child_nodes_disconnect_timeout = new_options.child_nodes_disconnect_timeout;
|
||||
log_info(_("\"child_nodes_disconnect_timeout\" is now \"%i\""), new_options.child_nodes_disconnect_timeout);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* conninfo */
|
||||
if (strncmp(orig_options->conninfo, new_options.conninfo, sizeof(orig_options->conninfo)) != 0)
|
||||
{
|
||||
|
||||
@@ -148,6 +148,11 @@ typedef struct
|
||||
bool primary_visibility_consensus;
|
||||
char failover_validation_command[MAXPGPATH];
|
||||
int election_rerun_interval;
|
||||
int child_nodes_check_interval;
|
||||
int child_nodes_disconnect_min_count;
|
||||
int child_nodes_connected_min_count;
|
||||
int child_nodes_disconnect_timeout;
|
||||
char child_nodes_disconnect_command[MAXPGPATH];
|
||||
|
||||
/* BDR settings */
|
||||
bool bdr_local_monitoring_only;
|
||||
@@ -221,6 +226,10 @@ typedef struct
|
||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \
|
||||
CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \
|
||||
DEFAULT_CHILD_NODES_CHECK_INTERVAL, \
|
||||
DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT, \
|
||||
DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT, \
|
||||
DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT, "", \
|
||||
/* BDR settings */ \
|
||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||
/* service settings */ \
|
||||
|
||||
56
dbutils.c
56
dbutils.c
@@ -1575,6 +1575,7 @@ identify_system(PGconn *repl_conn, t_system_identification *identification)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
TimeLineHistoryEntry *
|
||||
get_timeline_history(PGconn *repl_conn, TimeLineID tli)
|
||||
{
|
||||
@@ -1672,6 +1673,46 @@ get_timeline_history(PGconn *repl_conn, TimeLineID tli)
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
bool success = true;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||
" n.slot_name, n.location, n.priority, n.active, n.config_file, "
|
||||
" '' AS upstream_node_name, "
|
||||
" CASE WHEN sr.application_name IS NULL THEN FALSE ELSE TRUE END AS attached "
|
||||
" FROM repmgr.nodes n "
|
||||
" LEFT JOIN pg_catalog.pg_stat_replication sr "
|
||||
" ON sr.application_name = n.node_name "
|
||||
" WHERE n.upstream_node_id = %i ",
|
||||
node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_active_sibling_node_records():\n%s", query.data);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_db_error(conn, query.data, _("get_active_sibling_records(): unable to execute query"));
|
||||
success = false;
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
/* this will return an empty list if there was an error executing the query */
|
||||
_populate_node_records(res, node_list);
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
/* =============================== */
|
||||
/* repmgrd shared memory functions */
|
||||
/* =============================== */
|
||||
@@ -2273,9 +2314,18 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row, bool init_
|
||||
node_info->active = atobool(PQgetvalue(res, row, 9));
|
||||
snprintf(node_info->config_file, sizeof(node_info->config_file), "%s", PQgetvalue(res, row, 10));
|
||||
|
||||
/* This won't normally be set */
|
||||
/* These are only set by certain queries */
|
||||
snprintf(node_info->upstream_node_name, sizeof(node_info->upstream_node_name), "%s", PQgetvalue(res, row, 11));
|
||||
|
||||
if (PQgetisnull(res, row, 12))
|
||||
{
|
||||
node_info->attached = NODE_ATTACHED_UNKNOWN;
|
||||
}
|
||||
else
|
||||
{
|
||||
node_info->attached = atobool(PQgetvalue(res, row, 12)) ? NODE_ATTACHED : NODE_DETACHED;
|
||||
}
|
||||
|
||||
/* Set remaining struct fields with default values */
|
||||
|
||||
if (init_defaults == true)
|
||||
@@ -2398,7 +2448,7 @@ get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info)
|
||||
initPQExpBuffer(&query);
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name "
|
||||
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached "
|
||||
" FROM repmgr.nodes n "
|
||||
" LEFT JOIN repmgr.nodes un "
|
||||
" ON un.node_id = n.upstream_node_id"
|
||||
@@ -2698,7 +2748,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name "
|
||||
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name, NULL AS attached "
|
||||
" FROM repmgr.nodes n "
|
||||
" LEFT JOIN repmgr.nodes un "
|
||||
" ON un.node_id = n.upstream_node_id"
|
||||
|
||||
12
dbutils.h
12
dbutils.h
@@ -29,7 +29,7 @@
|
||||
#include "strutil.h"
|
||||
#include "voting.h"
|
||||
|
||||
#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name "
|
||||
#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name, NULL AS attached "
|
||||
#define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''"
|
||||
#define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name"
|
||||
|
||||
@@ -92,6 +92,13 @@ typedef enum
|
||||
CONN_ERROR
|
||||
} ConnectionStatus;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NODE_ATTACHED_UNKNOWN = -1,
|
||||
NODE_DETACHED,
|
||||
NODE_ATTACHED
|
||||
} NodeAttached;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SLOT_UNKNOWN = -1,
|
||||
@@ -152,7 +159,7 @@ typedef struct s_node_info
|
||||
/* for ad-hoc use e.g. when working with a list of nodes */
|
||||
char details[MAXLEN];
|
||||
bool reachable;
|
||||
bool attached;
|
||||
NodeAttached attached;
|
||||
/* various statistics */
|
||||
int max_wal_senders;
|
||||
int attached_wal_receivers;
|
||||
@@ -429,6 +436,7 @@ int get_primary_node_id(PGconn *conn);
|
||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
||||
bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list);
|
||||
|
||||
/* repmgrd shared memory functions */
|
||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||
|
||||
@@ -15,6 +15,11 @@
|
||||
See also: <xref linkend="upgrading-repmgr">
|
||||
</para>
|
||||
|
||||
<sect1 id="release-4.4">
|
||||
<title>Release 4.4</title>
|
||||
<para><emphasis>???, 2019</emphasis></para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="release-4.3.1">
|
||||
<title>Release 4.3.1</title>
|
||||
<para><emphasis>???, 2019</emphasis></para>
|
||||
|
||||
@@ -165,6 +165,111 @@
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-primary-standby-disconnection" xreflabel="Monitoring standby disconnections on the primary">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>standby disconnection</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>child node disconnection</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Monitoring standby disconnections on the primary node</title>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
This functionality is available in <link linkend="release-4.4">&repmgr 4.4</link> and later.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
When running on the primary node, <application>repmgrd</application> can
|
||||
monitor connections and in particular disconnections by its attached
|
||||
child nodes (standbys), and optionally execute a custom command
|
||||
if certain criteria are met (such as the number of attached nodes falling to
|
||||
zero following a failover to a new primary); this command can be used for
|
||||
example to "fence" the node and ensure it is isolated from any
|
||||
applications attempting to access the replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Every few seconds (defined by the configuration parameter <varname>child_nodes_check_interval</varname>
|
||||
(a value of <literal>0</literal> disables this altogether), <application>repmgrd</application> queries
|
||||
the <literal>pg_stat_replication</literal> system view and compares
|
||||
the nodes present there against the list of nodes registered with &repmgr; which
|
||||
should be attached to the primary.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If a child node (standby) is no longer present in <literal>pg_stat_replication</literal>,
|
||||
<application>repmgrd</application> notes the time it detected the node's absence, and additionally generates a
|
||||
<literal>child_node_disconnect</literal> event.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If a chile node (standby) which was absent from <literal>pg_stat_replication</literal> reappears,
|
||||
<application>repmgrd</application> clears the time it detected the node's absence, and additionally generates a
|
||||
<literal>child_node_reconnect</literal> event.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If an entirely new child node (standby) is detected, <application>repmgrd</application> adds it to its internal list
|
||||
and additionally generates a <literal>child_node_new_connect</literal> event.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If the <varname>child_nodes_disconnect_command</varname> parameter is set in
|
||||
<filename>repmgr.conf</filename>, <application>repmgrd</application> will then loop through all child nodes.
|
||||
If it determines that insufficient child nodes are connected, and a
|
||||
minimum of <varname>child_nodes_disconnect_timeout</varname> seconds (default: <literal>30</literal>
|
||||
has elapsed since the last node became disconnected, <application>repmgrd</application> will then execute the
|
||||
<varname>child_nodes_disconnect_command</varname> script.
|
||||
</para>
|
||||
<para>
|
||||
By default, the <varname>child_nodes_disconnect_command</varname> will only be executed
|
||||
if all child nodes are disconnected. If <varname>child_nodes_connected_min_count</varname>
|
||||
is set, the <varname>child_nodes_disconnect_command</varname> script will be triggered
|
||||
if the number of connected child nodes falls below the specified value (e.g.
|
||||
if set to <literal>2</literal>, the script will be triggered if only one child node
|
||||
is connected). Alternatively, if <varname>child_nodes_disconnect_min_count</varname>
|
||||
and more than that number of child nodes disconnects, the script will be triggered.
|
||||
</para>
|
||||
<para>
|
||||
The <varname>child_nodes_disconnect_command</varname> script will only be executed once
|
||||
while the criteria for its execution are met. If the criteria for its execution are no longer
|
||||
met (i.e. some child nodes have reconnected), it will be executed again if
|
||||
the criteria for its execution are met again.
|
||||
</para>
|
||||
<para>
|
||||
The <varname>child_nodes_disconnect_command</varname> script will not be executed if <application>repmgrd</application> is paused.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Note that child nodes which are not attached when <application>repmgrd</application>
|
||||
starts will <emphasis>not</emphasis> be considered as missing, as <application>repmgrd</application>
|
||||
cannot know why they are not attached.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-standby-disconnection-on-failover" xreflabel="Standby disconnection on failover">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
|
||||
@@ -346,6 +346,16 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# value: %n (node_id), %a (node_name). *Must* be the same on all nodes.
|
||||
#election_rerun_interval=15 # if "failover_validation_command" is set, and the command returns
|
||||
# an error, pause the specified amount of seconds before rerunning the election.
|
||||
#
|
||||
# The following items are relevant for repmgrd running on the primary,
|
||||
# and will be ignored on non-primary nodes
|
||||
#child_nodes_check_interval=5 # Interval (in seconds) to check for attached child nodes (standbys)
|
||||
#child_nodes_connected_min_count=-1 # Minimum number of child nodes which must remain connected, otherwise
|
||||
# disconnection command will be triggered
|
||||
#child_nodes_disconnect_min_count=-1 # Minimum number of disconnected child nodes required to execute disconnection command
|
||||
# (ignored if "child_nodes_connected_min_count" set)
|
||||
#child_nodes_disconnect_timeout=30 # Interval between child node disconnection and disconnection command execution
|
||||
#child_nodes_disconnect_command= # Command to execute if child node disconnection detected
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# service control commands
|
||||
|
||||
5
repmgr.h
5
repmgr.h
@@ -96,6 +96,11 @@
|
||||
#define DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_ELECTION_RERUN_INTERVAL 15 /* seconds */
|
||||
#define DEFAULT_CHILD_NODES_CHECK_INTERVAL 5 /* seconds */
|
||||
#define DEFAULT_CHILD_NODES_DISCONNECT_MIN_COUNT -1
|
||||
#define DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT -1
|
||||
#define DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
|
||||
|
||||
|
||||
#define WALRECEIVER_DISABLE_TIMEOUT_VALUE 86400000 /* milliseconds */
|
||||
|
||||
|
||||
@@ -53,6 +53,28 @@ typedef enum
|
||||
} ElectionResult;
|
||||
|
||||
|
||||
typedef struct t_child_node_info
|
||||
{
|
||||
int node_id;
|
||||
char node_name[NAMEDATALEN];
|
||||
NodeAttached attached;
|
||||
instr_time detached_time;
|
||||
struct t_child_node_info *next;
|
||||
} t_child_node_info;
|
||||
|
||||
typedef struct t_child_node_info_list
|
||||
{
|
||||
t_child_node_info *head;
|
||||
t_child_node_info *tail;
|
||||
int node_count;
|
||||
} t_child_node_info_list;
|
||||
|
||||
#define T_CHILD_NODE_INFO_LIST_INITIALIZER { \
|
||||
NULL, \
|
||||
NULL, \
|
||||
0 \
|
||||
}
|
||||
|
||||
static PGconn *upstream_conn = NULL;
|
||||
static PGconn *primary_conn = NULL;
|
||||
|
||||
@@ -63,6 +85,7 @@ static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
static instr_time last_monitoring_update;
|
||||
|
||||
static bool child_nodes_disconnect_command_executed = false;
|
||||
|
||||
static ElectionResult do_election(NodeInfoList *sibling_nodes, int *new_primary_id);
|
||||
static const char *_print_election_result(ElectionResult result);
|
||||
@@ -73,6 +96,7 @@ static void notify_followers(NodeInfoList *standby_nodes, int follow_node_id);
|
||||
static void check_connection(t_node_info *node_info, PGconn **conn);
|
||||
|
||||
static bool check_primary_status(int degraded_monitoring_elapsed);
|
||||
static void check_primary_child_nodes(t_child_node_info_list *local_child_nodes);
|
||||
|
||||
static bool wait_primary_notification(int *new_primary_id);
|
||||
static FailoverState follow_new_primary(int new_primary_id);
|
||||
@@ -93,6 +117,10 @@ static ElectionResult execute_failover_validation_command(t_node_info *node_info
|
||||
static void parse_failover_validation_command(const char *template, t_node_info *node_info, PQExpBufferData *out);
|
||||
static bool check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_info);
|
||||
|
||||
static t_child_node_info *append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached);
|
||||
static void remove_child_node_record(t_child_node_info_list *nodes, int node_id);
|
||||
static void clear_child_node_info_list(t_child_node_info_list *nodes);
|
||||
static void parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id);
|
||||
|
||||
void
|
||||
handle_sigint_physical(SIGNAL_ARGS)
|
||||
@@ -228,6 +256,8 @@ void
|
||||
monitor_streaming_primary(void)
|
||||
{
|
||||
instr_time log_status_interval_start;
|
||||
instr_time child_nodes_check_interval_start;
|
||||
t_child_node_info_list local_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
reset_node_voting_status();
|
||||
repmgrd_set_upstream_node_id(local_conn, NO_UPSTREAM_NODE);
|
||||
@@ -269,8 +299,55 @@ monitor_streaming_primary(void)
|
||||
}
|
||||
|
||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
||||
INSTR_TIME_SET_CURRENT(child_nodes_check_interval_start);
|
||||
local_node_info.node_status = NODE_STATUS_UP;
|
||||
|
||||
/*
|
||||
* get list of expected and attached nodes
|
||||
*/
|
||||
|
||||
{
|
||||
NodeInfoList db_child_node_records = T_NODE_INFO_LIST_INITIALIZER;
|
||||
bool success = get_child_nodes(local_conn, config_file_options.node_id, &db_child_node_records);
|
||||
|
||||
if (!success)
|
||||
{
|
||||
log_error(_("unable to retrieve list of child nodes"));
|
||||
}
|
||||
else
|
||||
{
|
||||
NodeInfoListCell *cell;
|
||||
|
||||
for (cell = db_child_node_records.head; cell; cell = cell->next)
|
||||
{
|
||||
/*
|
||||
* At startup, if a node for which a repmgr record exists, is not found
|
||||
* in pg_stat_replication, we can't know whether it has become detached, or
|
||||
* (e.g. during a provisioning operation) is a new node which has not yet
|
||||
* attached. We set the status to "NODE_ATTACHED_UNKNOWN" to stop repmgrd
|
||||
* emitting bogus "node has become detached" alerts.
|
||||
*/
|
||||
(void) append_child_node_record(&local_child_nodes,
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->attached == NODE_ATTACHED ? NODE_ATTACHED : NODE_ATTACHED_UNKNOWN);
|
||||
|
||||
if (cell->node_info->attached == NODE_ATTACHED)
|
||||
{
|
||||
log_info(_("child node \"%s\" (node ID: %i) is attached"),
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("child node \"%s\" (node ID: %i) is not yet attached"),
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
/*
|
||||
@@ -431,6 +508,20 @@ monitor_streaming_primary(void)
|
||||
* starts up check status, switch monitoring mode
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
if (config_file_options.child_nodes_check_interval > 0)
|
||||
{
|
||||
int child_nodes_check_interval_elapsed = calculate_elapsed(child_nodes_check_interval_start);
|
||||
|
||||
if (child_nodes_check_interval_elapsed >= config_file_options.child_nodes_check_interval)
|
||||
{
|
||||
INSTR_TIME_SET_CURRENT(child_nodes_check_interval_start);
|
||||
check_primary_child_nodes(&local_child_nodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loop:
|
||||
|
||||
/* check node is still primary, if not restart monitoring */
|
||||
@@ -678,6 +769,406 @@ check_primary_status(int degraded_monitoring_elapsed)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
check_primary_child_nodes(t_child_node_info_list *local_child_nodes)
|
||||
{
|
||||
NodeInfoList db_child_node_records = T_NODE_INFO_LIST_INITIALIZER;
|
||||
NodeInfoListCell *cell;
|
||||
/* lists for newly attached and missing nodes */
|
||||
t_child_node_info_list disconnected_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER;
|
||||
t_child_node_info_list reconnected_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER;
|
||||
t_child_node_info_list new_child_nodes = T_CHILD_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
bool success = get_child_nodes(local_conn, config_file_options.node_id, &db_child_node_records);
|
||||
|
||||
if (!success)
|
||||
{
|
||||
/* unlikely this will happen, but if it does, we'll try again next time round */
|
||||
log_error(_("unable to retrieve list of child nodes"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (db_child_node_records.node_count == 0)
|
||||
{
|
||||
/* no registered child nodes - nothing to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* compare DB records with our internal list;
|
||||
* this will tell us about:
|
||||
* - previously known nodes and their current status
|
||||
* - newly registered nodes we didn't know about
|
||||
*
|
||||
* We'll need to compare the opposite way to check for nodes
|
||||
* which are in the internal list, but which have now vanished
|
||||
*/
|
||||
for (cell = db_child_node_records.head; cell; cell = cell->next)
|
||||
{
|
||||
t_child_node_info *local_child_node_rec;
|
||||
bool local_child_node_rec_found = false;
|
||||
|
||||
log_debug("child node: %i; attached: %s",
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->attached == NODE_ATTACHED ? "yes" : "no");
|
||||
|
||||
for (local_child_node_rec = local_child_nodes->head; local_child_node_rec; local_child_node_rec = local_child_node_rec->next)
|
||||
{
|
||||
if (local_child_node_rec->node_id == cell->node_info->node_id)
|
||||
{
|
||||
local_child_node_rec_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (local_child_node_rec_found == true)
|
||||
{
|
||||
/* our node record shows node attached, DB record indicates detached */
|
||||
if (local_child_node_rec->attached == NODE_ATTACHED && cell->node_info->attached == NODE_DETACHED)
|
||||
{
|
||||
t_child_node_info *detached_child_node;
|
||||
|
||||
local_child_node_rec->attached = NODE_DETACHED;
|
||||
INSTR_TIME_SET_CURRENT(local_child_node_rec->detached_time);
|
||||
|
||||
detached_child_node = append_child_node_record(&disconnected_child_nodes,
|
||||
local_child_node_rec->node_id,
|
||||
local_child_node_rec->node_name,
|
||||
NODE_DETACHED);
|
||||
detached_child_node->detached_time = local_child_node_rec->detached_time;
|
||||
}
|
||||
/* our node record shows node detached, DB record indicates attached */
|
||||
else if (local_child_node_rec->attached == NODE_DETACHED && cell->node_info->attached == NODE_ATTACHED)
|
||||
{
|
||||
t_child_node_info *attached_child_node;
|
||||
|
||||
local_child_node_rec->attached = NODE_ATTACHED;
|
||||
|
||||
attached_child_node = append_child_node_record(&reconnected_child_nodes,
|
||||
local_child_node_rec->node_id,
|
||||
local_child_node_rec->node_name,
|
||||
NODE_ATTACHED);
|
||||
attached_child_node->detached_time = local_child_node_rec->detached_time;
|
||||
INSTR_TIME_SET_ZERO(local_child_node_rec->detached_time);
|
||||
}
|
||||
else if (local_child_node_rec->attached == NODE_ATTACHED_UNKNOWN && cell->node_info->attached == NODE_ATTACHED)
|
||||
{
|
||||
local_child_node_rec->attached = NODE_ATTACHED;
|
||||
|
||||
append_child_node_record(&new_child_nodes,
|
||||
local_child_node_rec->node_id,
|
||||
local_child_node_rec->node_name,
|
||||
NODE_ATTACHED);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* node we didn't know about before */
|
||||
|
||||
NodeAttached attached = cell->node_info->attached;
|
||||
|
||||
/*
|
||||
* node registered but not attached - set state to "UNKNOWN"
|
||||
* to prevent a bogus "reattach" event being generated
|
||||
*/
|
||||
if (attached == NODE_DETACHED)
|
||||
attached = NODE_ATTACHED_UNKNOWN;
|
||||
|
||||
(void) append_child_node_record(local_child_nodes,
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name,
|
||||
attached);
|
||||
(void) append_child_node_record(&new_child_nodes,
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name,
|
||||
attached);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if any nodes in local list are no longer in list returned
|
||||
* from database.
|
||||
*/
|
||||
{
|
||||
t_child_node_info *local_child_node_rec;
|
||||
bool db_node_rec_found = false;
|
||||
|
||||
for (local_child_node_rec = local_child_nodes->head; local_child_node_rec; local_child_node_rec = local_child_node_rec->next)
|
||||
{
|
||||
for (cell = db_child_node_records.head; cell; cell = cell->next)
|
||||
{
|
||||
if (cell->node_info->node_id == local_child_node_rec->node_id)
|
||||
{
|
||||
db_node_rec_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (db_node_rec_found == false)
|
||||
{
|
||||
log_notice(_("child node \"%s\" (node id %i) is no longer connected or registered"),
|
||||
local_child_node_rec->node_name,
|
||||
local_child_node_rec->node_id);
|
||||
remove_child_node_record(local_child_nodes, local_child_node_rec->node_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* generate "child_node_disconnect" events */
|
||||
if (disconnected_child_nodes.node_count > 0)
|
||||
{
|
||||
t_child_node_info *child_node_rec;
|
||||
for (child_node_rec = disconnected_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node \"%s\" (node ID: %i) has disconnected"),
|
||||
child_node_rec->node_name,
|
||||
child_node_rec->node_id);
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"child_node_disconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate "child_node_reconnect" events */
|
||||
if (reconnected_child_nodes.node_count > 0)
|
||||
{
|
||||
t_child_node_info *child_node_rec;
|
||||
for (child_node_rec = reconnected_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node \"%s\" (node ID: %i) has reconnected after %i seconds"),
|
||||
child_node_rec->node_name,
|
||||
child_node_rec->node_id,
|
||||
calculate_elapsed( child_node_rec->detached_time ));
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"child_node_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate "child_node_new_connect" events */
|
||||
if (new_child_nodes.node_count > 0)
|
||||
{
|
||||
t_child_node_info *child_node_rec;
|
||||
for (child_node_rec = new_child_nodes.head; child_node_rec; child_node_rec = child_node_rec->next)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("new node \"%s\" (node ID: %i) has connected"),
|
||||
child_node_rec->node_name,
|
||||
child_node_rec->node_id);
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"child_node_new_connect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (config_file_options.child_nodes_disconnect_command[0] != '\0')
|
||||
{
|
||||
/*
|
||||
* script will only be executed if the number of attached
|
||||
* standbys is lower than this number
|
||||
*/
|
||||
int min_required_connected_count = 1;
|
||||
int connected_count = 0;
|
||||
|
||||
/*
|
||||
* Calculate hi
|
||||
*/
|
||||
|
||||
if (config_file_options.child_nodes_connected_min_count > 0)
|
||||
{
|
||||
min_required_connected_count = config_file_options.child_nodes_connected_min_count;
|
||||
}
|
||||
else if (config_file_options.child_nodes_disconnect_min_count > 0)
|
||||
{
|
||||
min_required_connected_count =
|
||||
(db_child_node_records.node_count - config_file_options.child_nodes_disconnect_min_count)
|
||||
+ 1;
|
||||
}
|
||||
|
||||
/* calculate number of connected child nodes */
|
||||
for (cell = db_child_node_records.head; cell; cell = cell->next)
|
||||
{
|
||||
if (cell->node_info->attached == NODE_ATTACHED)
|
||||
connected_count ++;
|
||||
}
|
||||
|
||||
log_debug("connected: %i; min required: %i",
|
||||
connected_count,
|
||||
min_required_connected_count);
|
||||
|
||||
if (connected_count < min_required_connected_count)
|
||||
{
|
||||
log_notice(_("%i (of %i) child nodes are connected, but at least %i child nodes required"),
|
||||
connected_count,
|
||||
db_child_node_records.node_count,
|
||||
min_required_connected_count);
|
||||
|
||||
if (child_nodes_disconnect_command_executed == false)
|
||||
{
|
||||
t_child_node_info *child_node_rec;
|
||||
|
||||
/* set these for informative purposes */
|
||||
int most_recently_disconnected_node_id = UNKNOWN_NODE_ID;
|
||||
int most_recently_disconnected_elapsed = -1;
|
||||
|
||||
bool most_recent_disconnect_below_threshold = false;
|
||||
instr_time current_time_base;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(current_time_base);
|
||||
|
||||
for (child_node_rec = local_child_nodes->head; child_node_rec; child_node_rec = child_node_rec->next)
|
||||
{
|
||||
instr_time current_time = current_time_base;
|
||||
int seconds_since_detached;
|
||||
|
||||
if (child_node_rec->attached != NODE_DETACHED)
|
||||
continue;
|
||||
|
||||
INSTR_TIME_SUBTRACT(current_time, child_node_rec->detached_time);
|
||||
seconds_since_detached = (int) INSTR_TIME_GET_DOUBLE(current_time);
|
||||
|
||||
if (seconds_since_detached < config_file_options.child_nodes_disconnect_timeout)
|
||||
{
|
||||
most_recent_disconnect_below_threshold = true;
|
||||
}
|
||||
|
||||
if (most_recently_disconnected_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
most_recently_disconnected_node_id = child_node_rec->node_id;
|
||||
most_recently_disconnected_elapsed = seconds_since_detached;
|
||||
}
|
||||
else if (seconds_since_detached < most_recently_disconnected_elapsed)
|
||||
{
|
||||
most_recently_disconnected_node_id = child_node_rec->node_id;
|
||||
most_recently_disconnected_elapsed = seconds_since_detached;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (most_recent_disconnect_below_threshold == false && most_recently_disconnected_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
char parsed_child_nodes_disconnect_command[MAXPGPATH];
|
||||
int child_nodes_disconnect_command_result;
|
||||
PQExpBufferData event_details;
|
||||
bool success = true;
|
||||
|
||||
parse_child_nodes_disconnect_command(parsed_child_nodes_disconnect_command,
|
||||
config_file_options.child_nodes_disconnect_command,
|
||||
local_node_info.node_id);
|
||||
|
||||
log_info(_("most recently detached child node was %i (ca. %i seconds ago), triggering \"child_nodes_disconnect_command\""),
|
||||
most_recently_disconnected_node_id,
|
||||
most_recently_disconnected_elapsed);
|
||||
|
||||
log_info(_("\"child_nodes_disconnect_command\" is:\n \"%s\""),
|
||||
parsed_child_nodes_disconnect_command);
|
||||
|
||||
child_nodes_disconnect_command_result = system(parsed_child_nodes_disconnect_command);
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
if (child_nodes_disconnect_command_result != 0)
|
||||
{
|
||||
success = false;
|
||||
|
||||
appendPQExpBufferStr(&event_details,
|
||||
_("unable to execute \"child_nodes_disconnect_command\""));
|
||||
|
||||
log_error("%s", event_details.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBufferStr(&event_details,
|
||||
_("\"child_nodes_disconnect_command\" successfully executed"));
|
||||
|
||||
log_info("%s", event_details.data);
|
||||
}
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"child_nodes_disconnect_command",
|
||||
success,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
child_nodes_disconnect_command_executed = true;
|
||||
}
|
||||
else if (most_recently_disconnected_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
log_info(_("most recently detached child node was %i (ca. %i seconds ago), not triggering \"child_nodes_disconnect_command\""),
|
||||
most_recently_disconnected_node_id,
|
||||
most_recently_disconnected_elapsed);
|
||||
log_detail(_("\"child_nodes_disconnect_timeout\" set to %i seconds"),
|
||||
config_file_options.child_nodes_disconnect_timeout);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("no child nodes have detached since repmgrd startup"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("\"child_nodes_disconnect_command\" was previously executed, taking no action"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* "child_nodes_disconnect_command" was executed, but for whatever reason
|
||||
* enough child nodes have returned to clear the threshold; in that case reset
|
||||
* the executed flag so we can execute the command again, if necessary
|
||||
*/
|
||||
if (child_nodes_disconnect_command_executed == true)
|
||||
{
|
||||
log_notice(_("%i (of %i) child nodes are now connected, meeting minimum requirement of %i child nodes"),
|
||||
connected_count,
|
||||
db_child_node_records.node_count,
|
||||
min_required_connected_count);
|
||||
child_nodes_disconnect_command_executed = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear_child_node_info_list(&disconnected_child_nodes);
|
||||
clear_child_node_info_list(&reconnected_child_nodes);
|
||||
clear_child_node_info_list(&new_child_nodes);
|
||||
|
||||
clear_node_info_list(&db_child_node_records);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* repmgrd running on a standby server
|
||||
*/
|
||||
@@ -4357,3 +4848,134 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
||||
|
||||
return can_follow;
|
||||
}
|
||||
|
||||
|
||||
static t_child_node_info *
|
||||
append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached)
|
||||
{
|
||||
t_child_node_info *child_node = pg_malloc0(sizeof(t_child_node_info));
|
||||
|
||||
child_node->node_id = node_id;
|
||||
snprintf(child_node->node_name, sizeof(child_node->node_name), "%s", node_name);
|
||||
child_node->attached = attached;
|
||||
|
||||
if (nodes->tail)
|
||||
nodes->tail->next = child_node;
|
||||
else
|
||||
nodes->head = child_node;
|
||||
|
||||
nodes->tail = child_node;
|
||||
nodes->node_count++;
|
||||
|
||||
return child_node;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
remove_child_node_record(t_child_node_info_list *nodes, int node_id)
|
||||
{
|
||||
t_child_node_info *node;
|
||||
t_child_node_info *prev_node = NULL;
|
||||
t_child_node_info *next_node = NULL;
|
||||
|
||||
node = nodes->head;
|
||||
|
||||
while (node != NULL)
|
||||
{
|
||||
next_node = node->next;
|
||||
|
||||
log_debug("ZZZ node: %i", node->node_id);
|
||||
|
||||
if (node->node_id == node_id)
|
||||
{
|
||||
/* first node */
|
||||
if (node == nodes->head)
|
||||
{
|
||||
nodes->head = next_node;
|
||||
}
|
||||
/* last node */
|
||||
else if (next_node == NULL)
|
||||
{
|
||||
prev_node->next = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
prev_node->next = next_node;
|
||||
}
|
||||
pfree(node);
|
||||
nodes->node_count--;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
prev_node = node;
|
||||
}
|
||||
node = next_node;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
clear_child_node_info_list(t_child_node_info_list *nodes)
|
||||
{
|
||||
t_child_node_info *node;
|
||||
t_child_node_info *next_node;
|
||||
|
||||
node = nodes->head;
|
||||
|
||||
while (node != NULL)
|
||||
{
|
||||
next_node = node->next;
|
||||
pfree(node);
|
||||
node = next_node;
|
||||
}
|
||||
|
||||
nodes->head = NULL;
|
||||
nodes->tail = NULL;
|
||||
nodes->node_count = 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id)
|
||||
{
|
||||
const char *src_ptr = NULL;
|
||||
char *dst_ptr = NULL;
|
||||
char *end_ptr = NULL;
|
||||
|
||||
dst_ptr = parsed_command;
|
||||
end_ptr = (parsed_command + MAXPGPATH) - 1;
|
||||
*end_ptr = '\0';
|
||||
|
||||
for (src_ptr = template; *src_ptr; src_ptr++)
|
||||
{
|
||||
if (*src_ptr == '%')
|
||||
{
|
||||
switch (src_ptr[1])
|
||||
{
|
||||
case '%':
|
||||
/* %%: replace with % */
|
||||
if (dst_ptr < end_ptr)
|
||||
{
|
||||
src_ptr++;
|
||||
*dst_ptr++ = *src_ptr;
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
/* %p: node id of the reporting primary */
|
||||
src_ptr++;
|
||||
snprintf(dst_ptr, end_ptr - dst_ptr, "%i", reporting_node_id);
|
||||
dst_ptr += strlen(dst_ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dst_ptr < end_ptr)
|
||||
*dst_ptr++ = *src_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
*dst_ptr = '\0';
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user