From fa66e72c2fa43595c833b89f6f4f00f786002832 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 21 May 2019 15:19:41 +0900 Subject: [PATCH] repmgrd: count witness server as child node for connection monitoring purposes As the witness server does not, by definition, ever have an entry in pg_stat_replication, we need to check its "attached" status by connecting to the witness server itself and querying the reported upstream node ID (which should be set by the witness server repmgrd). If this matches the current primary node ID, we count it as attached. --- repmgrd-physical.c | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/repmgrd-physical.c b/repmgrd-physical.c index bd4496e7..d860265e 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -116,6 +116,7 @@ static const char *format_failover_state(FailoverState failover_state); static ElectionResult execute_failover_validation_command(t_node_info *node_info); static void parse_failover_validation_command(const char *template, t_node_info *node_info, PQExpBufferData *out); static bool check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_info); +static void check_witness_attached(t_node_info *node_info); static t_child_node_info *append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached); static void remove_child_node_record(t_child_node_info_list *nodes, int node_id); @@ -333,6 +334,14 @@ monitor_streaming_primary(void) cell->node_info->node_name, cell->node_info->attached == NODE_ATTACHED ? NODE_ATTACHED : NODE_ATTACHED_UNKNOWN); + /* + * witness will not be "attached" in the normal way + */ + if (cell->node_info->type == WITNESS) + { + check_witness_attached(cell->node_info); + } + if (cell->node_info->attached == NODE_ATTACHED) { log_info(_("child node \"%s\" (ID: %i) is attached"), @@ -342,7 +351,7 @@ monitor_streaming_primary(void) else { log_info(_("child node \"%s\" (ID: %i) is not yet attached"), - cell->node_info->node_name, + cell->node_info->node_name, cell->node_info->node_id); } } @@ -809,6 +818,15 @@ check_primary_child_nodes(t_child_node_info_list *local_child_nodes) t_child_node_info *local_child_node_rec; bool local_child_node_rec_found = false; + + /* + * witness will not be "attached" in the normal way + */ + if (cell->node_info->type == WITNESS) + { + check_witness_attached(cell->node_info); + } + log_debug("child node: %i; attached: %s", cell->node_info->node_id, cell->node_info->attached == NODE_ATTACHED ? "yes" : "no"); @@ -4884,6 +4902,33 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll } +static void +check_witness_attached(t_node_info *node_info) +{ + /* + * connect and check upstream node id; at this point we don't care if it's + * not reachable, only whether we can mark it as attached or not. + */ + PGconn *witness_conn = establish_db_connection_quiet(node_info->conninfo); + + if (PQstatus(witness_conn) == CONNECTION_OK) + { + int witness_upstream_node_id = repmgrd_get_upstream_node_id(witness_conn); + + log_debug("witness node %i's upstream node ID reported as %i", + node_info->node_id, + witness_upstream_node_id); + + if (witness_upstream_node_id == local_node_info.node_id) + { + node_info->attached = NODE_ATTACHED; + } + } + + PQfinish(witness_conn); +} + + static t_child_node_info * append_child_node_record(t_child_node_info_list *nodes, int node_id, const char *node_name, NodeAttached attached) {