mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
In many cases we'll want to bail out with an error if the node list can't be retrieved for any reason. This saves some repetitive coding.
661 lines
17 KiB
C
661 lines
17 KiB
C
/*
|
|
* repmgrd-bdr.c - BDR functionality for repmgrd
|
|
*
|
|
* Copyright (c) 2ndQuadrant, 2010-2018
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <signal.h>
|
|
|
|
#include "repmgr.h"
|
|
#include "repmgrd.h"
|
|
#include "repmgrd-bdr.h"
|
|
#include "configfile.h"
|
|
|
|
|
|
static void do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node);
|
|
static void do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node);
|
|
|
|
|
|
void
|
|
do_bdr_node_check(void)
|
|
{
|
|
/* nothing to do at the moment */
|
|
}
|
|
|
|
void
|
|
handle_sigint_bdr(SIGNAL_ARGS)
|
|
{
|
|
PQExpBufferData event_details;
|
|
|
|
initPQExpBuffer(&event_details);
|
|
|
|
appendPQExpBuffer(&event_details,
|
|
"%s signal received",
|
|
postgres_signal_arg == SIGTERM
|
|
? "TERM" : "INT");
|
|
|
|
create_event_notification(local_conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"repmgrd_shutdown",
|
|
true,
|
|
event_details.data);
|
|
termPQExpBuffer(&event_details);
|
|
|
|
terminate(SUCCESS);
|
|
}
|
|
|
|
|
|
void
|
|
monitor_bdr(void)
|
|
{
|
|
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
|
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
|
|
RecordStatus record_status;
|
|
NodeInfoListCell *cell;
|
|
PQExpBufferData event_details;
|
|
instr_time log_status_interval_start;
|
|
|
|
/* sanity check local database */
|
|
log_info(_("connecting to local database \"%s\""),
|
|
config_file_options.conninfo);
|
|
|
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
|
|
/*
|
|
* Local node must be running
|
|
*/
|
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
{
|
|
log_error(_("unable connect to local node (ID: %i), terminating"),
|
|
local_node_info.node_id);
|
|
log_hint(_("local node must be running before repmgrd can start"));
|
|
PQfinish(local_conn);
|
|
exit(ERR_DB_CONN);
|
|
}
|
|
|
|
/*
|
|
* Verify that database is a BDR one TODO: check if supported BDR version?
|
|
*/
|
|
log_info(_("connected to database, checking for BDR"));
|
|
|
|
if (!is_bdr_db(local_conn, NULL))
|
|
{
|
|
log_error(_("database is not BDR-enabled"));
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (is_table_in_bdr_replication_set(local_conn, "nodes", "repmgr") == false)
|
|
{
|
|
log_error(_("repmgr metadata table 'repmgr.%s' is not in the 'repmgr' replication set"),
|
|
"nodes");
|
|
|
|
/*
|
|
* TODO: add `repmgr bdr sync` or similar for this situation, and hint
|
|
* here
|
|
*/
|
|
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
record_status = get_bdr_node_record_by_name(local_conn, local_node_info.node_name, &bdr_node_info);
|
|
|
|
if (record_status != RECORD_FOUND)
|
|
{
|
|
log_error(_("unable to retrieve BDR record for node %s, terminating"),
|
|
local_node_info.node_name);
|
|
PQfinish(local_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (local_node_info.active == false)
|
|
{
|
|
log_error(_("local node (ID: %i) is marked as inactive in repmgr"),
|
|
local_node_info.node_id);
|
|
log_hint(_("if the node has been reactivated, run \"repmgr bdr register --force\" and restart repmgrd"));
|
|
PQfinish(local_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (is_active_bdr_node(local_conn, local_node_info.node_name) == false)
|
|
{
|
|
log_error(_("BDR node \"%s\" is not active, terminating"),
|
|
local_node_info.node_name);
|
|
PQfinish(local_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/* Log startup event */
|
|
create_event_record(local_conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"repmgrd_start",
|
|
true,
|
|
NULL);
|
|
|
|
/*
|
|
* retrieve list of all nodes - we'll need these if the DB connection goes
|
|
* away
|
|
*/
|
|
if (get_all_node_records(local_conn, &nodes) == false)
|
|
{
|
|
/* get_all_node_records() will display the error */
|
|
PQfinish(local_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
|
|
/* we're expecting all (both) nodes to be up */
|
|
for (cell = nodes.head; cell; cell = cell->next)
|
|
{
|
|
cell->node_info->node_status = NODE_STATUS_UP;
|
|
}
|
|
|
|
log_info(_("starting continuous BDR node monitoring on node %i"),
|
|
config_file_options.node_id);
|
|
|
|
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
|
|
|
while (true)
|
|
{
|
|
|
|
/* monitoring loop */
|
|
log_verbose(LOG_DEBUG, "BDR check loop - checking %i nodes", nodes.node_count);
|
|
|
|
for (cell = nodes.head; cell; cell = cell->next)
|
|
{
|
|
if (config_file_options.bdr_local_monitoring_only == true
|
|
&& cell->node_info->node_id != local_node_info.node_id)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (cell->node_info->node_id == local_node_info.node_id)
|
|
{
|
|
log_debug("checking local node %i in %s state",
|
|
local_node_info.node_id,
|
|
print_monitoring_state(cell->node_info->monitoring_state));
|
|
}
|
|
else
|
|
{
|
|
log_debug("checking other node %i in %s state",
|
|
cell->node_info->node_id,
|
|
print_monitoring_state(cell->node_info->monitoring_state));
|
|
}
|
|
|
|
|
|
switch (cell->node_info->monitoring_state)
|
|
{
|
|
case MS_NORMAL:
|
|
{
|
|
if (is_server_available(cell->node_info->conninfo) == false)
|
|
{
|
|
/* node is down, we were expecting it to be up */
|
|
if (cell->node_info->node_status == NODE_STATUS_UP)
|
|
{
|
|
instr_time node_unreachable_start;
|
|
|
|
INSTR_TIME_SET_CURRENT(node_unreachable_start);
|
|
|
|
cell->node_info->node_status = NODE_STATUS_DOWN;
|
|
|
|
if (cell->node_info->conn != NULL)
|
|
{
|
|
PQfinish(cell->node_info->conn);
|
|
cell->node_info->conn = NULL;
|
|
}
|
|
|
|
log_warning(_("unable to connect to node %s (ID %i)"),
|
|
cell->node_info->node_name, cell->node_info->node_id);
|
|
//cell->node_info->conn = try_reconnect(cell->node_info);
|
|
try_reconnect(&cell->node_info->conn, cell->node_info);
|
|
|
|
/* node has recovered - log and continue */
|
|
if (cell->node_info->node_status == NODE_STATUS_UP)
|
|
{
|
|
int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start);
|
|
|
|
initPQExpBuffer(&event_details);
|
|
|
|
appendPQExpBuffer(&event_details,
|
|
_("reconnected to node %i after %i seconds"),
|
|
cell->node_info->node_id,
|
|
node_unreachable_elapsed);
|
|
log_notice("%s", event_details.data);
|
|
|
|
create_event_notification(cell->node_info->conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"bdr_reconnect",
|
|
true,
|
|
event_details.data);
|
|
termPQExpBuffer(&event_details);
|
|
|
|
goto loop;
|
|
}
|
|
|
|
/* still down after reconnect attempt(s) */
|
|
if (cell->node_info->node_status == NODE_STATUS_DOWN)
|
|
{
|
|
do_bdr_failover(&nodes, cell->node_info);
|
|
goto loop;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case MS_DEGRADED:
|
|
{
|
|
/* degraded monitoring */
|
|
if (is_server_available(cell->node_info->conninfo) == true)
|
|
{
|
|
do_bdr_recovery(&nodes, cell->node_info);
|
|
}
|
|
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
loop:
|
|
|
|
/* emit "still alive" log message at regular intervals, if requested */
|
|
if (config_file_options.log_status_interval > 0)
|
|
{
|
|
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
|
|
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
|
|
{
|
|
log_info(_("monitoring BDR replication status on node \"%s\" (ID: %i)"),
|
|
local_node_info.node_name,
|
|
local_node_info.node_id);
|
|
|
|
for (cell = nodes.head; cell; cell = cell->next)
|
|
{
|
|
if (cell->node_info->monitoring_state == MS_DEGRADED)
|
|
{
|
|
log_detail(_("monitoring node \"%s\" (ID: %i) in degraded mode"),
|
|
cell->node_info->node_name,
|
|
cell->node_info->node_id);
|
|
}
|
|
}
|
|
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
|
}
|
|
}
|
|
|
|
if (got_SIGHUP)
|
|
{
|
|
/*
|
|
* if we can reload, then could need to change local_conn
|
|
*/
|
|
if (reload_config(&config_file_options, BDR))
|
|
{
|
|
PQfinish(local_conn);
|
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
update_registration(local_conn);
|
|
}
|
|
|
|
got_SIGHUP = false;
|
|
}
|
|
|
|
/* XXX this looks like it will never be called */
|
|
if (got_SIGHUP)
|
|
{
|
|
log_debug("SIGHUP received");
|
|
|
|
if (reload_config(&config_file_options, BDR))
|
|
{
|
|
PQfinish(local_conn);
|
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
|
|
if (*config_file_options.log_file)
|
|
{
|
|
FILE *fd;
|
|
|
|
fd = freopen(config_file_options.log_file, "a", stderr);
|
|
if (fd == NULL)
|
|
{
|
|
fprintf(stderr, "error reopening stderr to \"%s\": %s",
|
|
config_file_options.log_file, strerror(errno));
|
|
}
|
|
}
|
|
}
|
|
got_SIGHUP = false;
|
|
}
|
|
|
|
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
|
|
config_file_options.monitor_interval_secs);
|
|
sleep(config_file_options.monitor_interval_secs);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* do_bdr_failover()
|
|
*
|
|
* Here we attempt to perform a BDR "failover".
|
|
*
|
|
* As there's no equivalent of a physical replication failover,
|
|
* we'll do the following:
|
|
*
|
|
* - connect to active node
|
|
* - generate an event log record on that node
|
|
* - optionally execute `bdr_failover_command`, passing the conninfo string
|
|
* of that node to the command; this can be used for e.g. reconfiguring
|
|
* pgbouncer.
|
|
*
|
|
*/
|
|
|
|
void
|
|
do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
|
|
{
|
|
PGconn *next_node_conn = NULL;
|
|
NodeInfoListCell *cell;
|
|
PQExpBufferData event_details;
|
|
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
|
t_node_info target_node = T_NODE_INFO_INITIALIZER;
|
|
t_node_info failed_node = T_NODE_INFO_INITIALIZER;
|
|
RecordStatus record_status;
|
|
|
|
/* if one of the two nodes is down, cluster will be in a degraded state */
|
|
monitored_node->monitoring_state = MS_DEGRADED;
|
|
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
|
|
|
/* terminate local connection if this is the failed node */
|
|
if (monitored_node->node_id == local_node_info.node_id)
|
|
{
|
|
PQfinish(local_conn);
|
|
local_conn = NULL;
|
|
}
|
|
|
|
|
|
/* get other node */
|
|
|
|
for (cell = nodes->head; cell; cell = cell->next)
|
|
{
|
|
log_debug("do_bdr_failover() %s", cell->node_info->node_name);
|
|
|
|
/*
|
|
* don't attempt to connect to the current monitored node, as that's
|
|
* the one which has failed
|
|
*/
|
|
if (cell->node_info->node_id == monitored_node->node_id)
|
|
continue;
|
|
|
|
/* TODO: reuse local conn if local node is up */
|
|
next_node_conn = establish_db_connection(cell->node_info->conninfo, false);
|
|
|
|
if (PQstatus(next_node_conn) == CONNECTION_OK)
|
|
{
|
|
record_status = get_node_record(next_node_conn,
|
|
cell->node_info->node_id,
|
|
&target_node);
|
|
|
|
if (record_status == RECORD_FOUND)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
next_node_conn = NULL;
|
|
}
|
|
|
|
/* shouldn't happen, and if it does, it means everything is down */
|
|
if (next_node_conn == NULL)
|
|
{
|
|
log_error(_("no other available node found"));
|
|
|
|
/* no other nodes found - continue degraded monitoring */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* check if the node record for the failed node is still marked as active,
|
|
* if not it means the other node has done the "failover" already
|
|
*/
|
|
|
|
record_status = get_node_record(next_node_conn,
|
|
monitored_node->node_id,
|
|
&failed_node);
|
|
|
|
if (record_status == RECORD_FOUND && failed_node.active == false)
|
|
{
|
|
PQfinish(next_node_conn);
|
|
log_notice(_("record for node %i has already been set inactive"),
|
|
failed_node.node_id);
|
|
return;
|
|
}
|
|
|
|
if (am_bdr_failover_handler(next_node_conn, local_node_info.node_id) == false)
|
|
{
|
|
PQfinish(next_node_conn);
|
|
log_notice(_("other node's repmgrd is handling failover"));
|
|
return;
|
|
}
|
|
|
|
|
|
/* check here that the node hasn't come back up */
|
|
if (is_server_available(monitored_node->conninfo) == true)
|
|
{
|
|
log_notice(_("node %i has reappeared, aborting failover"),
|
|
monitored_node->node_id);
|
|
monitored_node->monitoring_state = MS_NORMAL;
|
|
PQfinish(next_node_conn);
|
|
}
|
|
|
|
log_debug("this node is the failover handler");
|
|
|
|
initPQExpBuffer(&event_details);
|
|
|
|
event_info.conninfo_str = target_node.conninfo;
|
|
event_info.node_name = target_node.node_name;
|
|
|
|
/* update node record on the active node */
|
|
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
|
|
|
|
log_notice(_("setting node record for node %i to inactive"), monitored_node->node_id);
|
|
|
|
appendPQExpBuffer(&event_details,
|
|
_("node \"%s\" (ID: %i) detected as failed; next available node is \"%s\" (ID: %i)"),
|
|
monitored_node->node_name,
|
|
monitored_node->node_id,
|
|
target_node.node_name,
|
|
target_node.node_id);
|
|
|
|
/*
|
|
* Create an event record
|
|
*
|
|
* If we were able to connect to another node, we'll update the event log
|
|
* there.
|
|
*
|
|
* In any case the event notification command will be triggered with the
|
|
* event "bdr_failover"
|
|
*/
|
|
|
|
|
|
create_event_notification_extended(next_node_conn,
|
|
&config_file_options,
|
|
monitored_node->node_id,
|
|
"bdr_failover",
|
|
true,
|
|
event_details.data,
|
|
&event_info);
|
|
|
|
log_info("%s", event_details.data);
|
|
|
|
termPQExpBuffer(&event_details);
|
|
|
|
unset_bdr_failover_handler(next_node_conn);
|
|
|
|
PQfinish(next_node_conn);
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
static void
|
|
do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
|
|
{
|
|
PGconn *recovered_node_conn;
|
|
|
|
PQExpBufferData event_details;
|
|
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
|
int i;
|
|
bool slot_reactivated = false;
|
|
int node_recovery_elapsed;
|
|
|
|
char node_name[MAXLEN] = "";
|
|
|
|
log_debug("handling recovery for monitored node %i", monitored_node->node_id);
|
|
|
|
recovered_node_conn = establish_db_connection(monitored_node->conninfo, false);
|
|
|
|
if (PQstatus(recovered_node_conn) != CONNECTION_OK)
|
|
{
|
|
PQfinish(recovered_node_conn);
|
|
return;
|
|
}
|
|
|
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
{
|
|
log_debug("no local connection - attempting to reconnect ");
|
|
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
|
}
|
|
|
|
/*
|
|
* still unable to connect - the local node is probably down, so we can't
|
|
* check for reconnection
|
|
*/
|
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
{
|
|
local_conn = NULL;
|
|
log_warning(_("unable to reconnect to local node"));
|
|
|
|
initPQExpBuffer(&event_details);
|
|
|
|
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
|
monitored_node->monitoring_state = MS_NORMAL;
|
|
monitored_node->node_status = NODE_STATUS_UP;
|
|
|
|
appendPQExpBuffer(
|
|
&event_details,
|
|
_("node \"%s\" (ID: %i) has become available after %i seconds"),
|
|
monitored_node->node_name,
|
|
monitored_node->node_id,
|
|
node_recovery_elapsed);
|
|
|
|
log_notice("%s", event_details.data);
|
|
|
|
termPQExpBuffer(&event_details);
|
|
|
|
PQfinish(recovered_node_conn);
|
|
|
|
return;
|
|
}
|
|
|
|
get_bdr_other_node_name(local_conn, local_node_info.node_id, node_name);
|
|
|
|
log_info(_("detected recovery on node %s (ID: %i), checking status"),
|
|
monitored_node->node_name,
|
|
monitored_node->node_id);
|
|
|
|
for (i = 0; i < config_file_options.bdr_recovery_timeout; i++)
|
|
{
|
|
ReplSlotStatus slot_status;
|
|
|
|
log_debug("checking for state of replication slot for node \"%s\"", node_name);
|
|
|
|
slot_status = get_bdr_node_replication_slot_status(
|
|
local_conn,
|
|
node_name);
|
|
|
|
if (slot_status == SLOT_ACTIVE)
|
|
{
|
|
slot_reactivated = true;
|
|
break;
|
|
}
|
|
|
|
sleep(1);
|
|
}
|
|
|
|
/* mark node as up */
|
|
monitored_node->node_status = NODE_STATUS_UP;
|
|
|
|
if (slot_reactivated == false)
|
|
{
|
|
log_warning(_("no active replication slot for node \"%s\" found after %i seconds"),
|
|
node_name,
|
|
config_file_options.bdr_recovery_timeout);
|
|
log_detail(_("this probably means inter-node BDR connections have not been re-established"));
|
|
PQfinish(recovered_node_conn);
|
|
return;
|
|
}
|
|
|
|
log_info(_("active replication slot for node \"%s\" found after %i seconds"),
|
|
node_name,
|
|
i);
|
|
|
|
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
|
monitored_node->monitoring_state = MS_NORMAL;
|
|
|
|
|
|
initPQExpBuffer(&event_details);
|
|
|
|
appendPQExpBuffer(&event_details,
|
|
_("node \"%s\" (ID: %i) has recovered after %i seconds"),
|
|
monitored_node->node_name,
|
|
monitored_node->node_id,
|
|
node_recovery_elapsed);
|
|
|
|
log_notice("%s", event_details.data);
|
|
|
|
|
|
/* other node will generate the event */
|
|
if (monitored_node->node_id == local_node_info.node_id)
|
|
{
|
|
termPQExpBuffer(&event_details);
|
|
PQfinish(recovered_node_conn);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/* generate the event on the currently active node only */
|
|
if (monitored_node->node_id != local_node_info.node_id)
|
|
{
|
|
event_info.conninfo_str = monitored_node->conninfo;
|
|
event_info.node_name = monitored_node->node_name;
|
|
|
|
create_event_notification_extended(
|
|
local_conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"bdr_recovery",
|
|
true,
|
|
event_details.data,
|
|
&event_info);
|
|
}
|
|
|
|
|
|
update_node_record_set_active(local_conn, monitored_node->node_id, true);
|
|
|
|
termPQExpBuffer(&event_details);
|
|
|
|
PQfinish(recovered_node_conn);
|
|
|
|
return;
|
|
}
|