Add "witness register" functionality

This commit is contained in:
Ian Barwick
2017-11-13 14:53:12 +09:00
parent 7fffe3ed96
commit a6cc4d80f0
13 changed files with 397 additions and 23 deletions

View File

@@ -338,6 +338,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
/*-------------
* witness settings
*-------------
*/
options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL;
/*-------------
* BDR settings
*-------------
@@ -551,6 +557,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
else if (strcmp(name, "primary_follow_timeout") == 0)
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
/* witness settings */
else if (strcmp(name, "witness_sync_interval") == 0)
options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1);
/* BDR settings */
else if (strcmp(name, "bdr_local_monitoring_only") == 0)
options->bdr_local_monitoring_only = parse_bool(value, name, error_list);

View File

@@ -98,6 +98,9 @@ typedef struct
int replication_lag_warning;
int replication_lag_critical;
/* witness settings */
int witness_sync_interval;
/* repmgrd settings */
failover_mode_opt failover;
char location[MAXLEN];
@@ -158,6 +161,8 @@ typedef struct
/* node check settings */ \
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
/* witness settings */ \
DEFAULT_WITNESS_SYNC_INTERVAL, \
/* repmgrd settings */ \
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
DEFAULT_MONITORING_INTERVAL, \

View File

@@ -50,6 +50,7 @@ static void _populate_node_records(PGresult *res, NodeInfoList *node_list);
static bool _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info);
static bool _create_event(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details, t_event_info *event_info, bool send_notification);
static bool _is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet);
static void _populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row);
static void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list);
@@ -1816,6 +1817,10 @@ parse_node_type(const char *type)
{
return STANDBY;
}
else if (strcmp(type, "witness") == 0)
{
return WITNESS;
}
else if (strcmp(type, "bdr") == 0)
{
return BDR;
@@ -1834,6 +1839,8 @@ get_node_type_string(t_server_type type)
return "primary";
case STANDBY:
return "standby";
case WITNESS:
return "witness";
case BDR:
return "bdr";
/* this should never happen */
@@ -2484,6 +2491,57 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options)
}
/*
* Copy node records from primary to witness servers.
*
* This is used when initially registering a witness server, and
* by repmgrd to update the node records when required.
*/
bool
witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn)
{
PGresult *res = NULL;
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
NodeInfoListCell *cell = NULL;
begin_transaction(witness_conn);
/* Defer constraints */
res = PQexec(witness_conn, "SET CONSTRAINTS ALL DEFERRED");
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_error(_("unable to defer constraints:\n %s"),
PQerrorMessage(witness_conn));
rollback_transaction(witness_conn);
return false;
}
/* truncate existing records */
if (truncate_node_records(witness_conn) == false)
{
rollback_transaction(witness_conn);
return false;
}
get_all_node_records(primary_conn, &nodes);
for (cell = nodes.head; cell; cell = cell->next)
{
create_node_record(witness_conn, NULL, cell->node_info);
}
/* and done */
commit_transaction(witness_conn);
return true;
}
bool
delete_node_record(PGconn *conn, int node)
{
@@ -2515,6 +2573,24 @@ delete_node_record(PGconn *conn, int node)
return true;
}
bool
truncate_node_records(PGconn *conn)
{
PGresult *res = NULL;
res = PQexec(conn, "TRUNCATE TABLE repmgr.nodes");
if (PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_error(_("unable to truncate node record table:\n %s"),
PQerrorMessage(conn));
PQclear(res);
return false;
}
PQclear(res);
return true;
}
void
get_node_replication_stats(PGconn *conn, t_node_info *node_info)
@@ -3906,8 +3982,8 @@ get_last_wal_receive_location(PGconn *conn)
/* BDR functions */
/* ============= */
bool
is_bdr_db(PGconn *conn, PQExpBufferData *output)
static bool
_is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet)
{
PQExpBufferData query;
PGresult *res = NULL;
@@ -3938,7 +4014,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
if (output != NULL)
appendPQExpBuffer(output, "%s", warning);
else
else if (quiet == false)
log_warning("%s", warning);
return is_bdr_db;
@@ -3959,7 +4035,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
if (output != NULL)
appendPQExpBuffer(output, "%s", warning);
else
else if (quiet == false)
log_warning("%s", warning);
}
@@ -3968,6 +4044,19 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
return is_bdr_db;
}
bool
is_bdr_db(PGconn *conn, PQExpBufferData *output)
{
return _is_bdr_db(conn, output, false);
}
bool
is_bdr_db_quiet(PGconn *conn)
{
return _is_bdr_db(conn, NULL, true);
}
bool
is_active_bdr_node(PGconn *conn, const char *node_name)

View File

@@ -38,6 +38,7 @@ typedef enum
UNKNOWN = 0,
PRIMARY,
STANDBY,
WITNESS,
BDR
} t_server_type;
@@ -413,6 +414,7 @@ void get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
bool delete_node_record(PGconn *conn, int node);
bool truncate_node_records(PGconn *conn);
bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
bool update_node_record_set_primary(PGconn *conn, int this_node_id);
@@ -420,6 +422,9 @@ bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_up
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
bool witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn);
void clear_node_info_list(NodeInfoList *nodes);
/* PostgreSQL configuration file location functions */
@@ -489,6 +494,7 @@ bool is_downstream_node_attached(PGconn *conn, char *node_name);
void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
bool is_bdr_db(PGconn *conn, PQExpBufferData *output);
bool is_bdr_db_quiet(PGconn *conn);
bool is_active_bdr_node(PGconn *conn, const char *node_name);
bool is_bdr_repmgr(PGconn *conn);
bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);

View File

@@ -279,6 +279,7 @@ do_cluster_show(void)
}
}
break;
case WITNESS:
case BDR:
{
/* node is reachable */

View File

@@ -806,7 +806,6 @@ do_standby_register(void)
}
}
if (PQstatus(conn) == CONNECTION_OK)
{
check_recovery_type(conn);
@@ -838,7 +837,7 @@ do_standby_register(void)
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to the primary database"));
log_hint(_("a primary must be configured before registering a standby"));
log_hint(_("a primary node must be configured before registering a standby node"));
exit(ERR_BAD_CONFIG);
}

View File

@@ -31,6 +31,240 @@
void
do_witness_register(void)
{
PGconn *witness_conn = NULL;
PGconn *primary_conn = NULL;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
t_node_info node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
bool record_created = false;
char repmgr_user[MAXLEN];
char repmgr_db[MAXLEN];
log_info(_("connecting to witness node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
witness_conn = establish_db_connection_quiet(config_file_options.conninfo);
if (PQstatus(witness_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_detail("%s",
PQerrorMessage(witness_conn));
exit(ERR_BAD_CONFIG);
}
/* check witness node's recovery type */
recovery_type = get_recovery_type(witness_conn);
if (recovery_type == RECTYPE_STANDBY)
{
log_error(_("provided node is a standby"));
log_error(_("a witness node must run on an independent primary server"));
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
/* check that witness node is not a BDR node */
if (is_bdr_db_quiet(witness_conn) == true)
{
log_error(_("witness node is a BDR node"));
log_hint(_("a witness node cannot be configured for a BDR cluster"));
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
/* connect to primary with provided parameters */
log_info(_("connecting to primary node"));
/*
* Extract the repmgr user and database names from the conninfo string
* provided in repmgr.conf
*/
get_conninfo_value(config_file_options.conninfo, "user", repmgr_user);
get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db);
param_set_ine(&source_conninfo, "user", repmgr_user);
param_set_ine(&source_conninfo, "dbname", repmgr_db);
/* We need to connect to check configuration and copy it */
primary_conn = establish_db_connection_by_params(&source_conninfo, false);
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to the primary node"));
log_hint(_("a primary node must be configured before registering a witness node"));
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
/* check primary node's recovery type */
recovery_type = get_recovery_type(witness_conn);
if (recovery_type == RECTYPE_STANDBY)
{
log_error(_("provided primary node is a standby"));
log_error(_("provide the connection details of the cluster's primary server"));
PQfinish(witness_conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
/* check that primary node is not a BDR node */
if (is_bdr_db_quiet(primary_conn) == true)
{
log_error(_("primary node is a BDR node"));
log_hint(_("a witness node cannot be configured for a BDR cluster"));
PQfinish(witness_conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
/* XXX sanity check witness node is not part of main cluster */
/* create repmgr extension, if does not exist */
if (!create_repmgr_extension(witness_conn))
{
PQfinish(witness_conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
/*
* check if node record exists on primary, overwrite if -F/--force provided,
* otherwise exit with error
*/
record_status = get_node_record(primary_conn,
config_file_options.node_id,
&node_record);
if (record_status == RECORD_FOUND)
{
/*
* If node is not a witness, cowardly refuse to do anything, let the
* user work out what's the correct thing to do.
*/
if (node_record.type != WITNESS)
{
log_error(_("node \"%s\" (ID: %i) is already registered as a %s node"),
config_file_options.node_name,
config_file_options.node_id,
get_node_type_string(node_record.type));
log_hint(_("use \"repmgr %s unregister\" to remove a non-witness node record"),
get_node_type_string(node_record.type));
PQfinish(witness_conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
if (!runtime_options.force)
{
log_error(_("witness node is already registered"));
log_hint(_("use option -F/--force to reregister the node"));
PQfinish(witness_conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
}
// XXX check other node with same name does not exist
/*
* if repmgr.nodes contains entries, delete if -F/--force provided,
* otherwise exit with error
*/
get_all_node_records(primary_conn, &nodes);
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
if (nodes.node_count > 0)
{
if (!runtime_options.force)
{
log_error(_("witness node is already initialised"));
log_hint(_("use option -F/--force to reinitialise the node"));
PQfinish(primary_conn);
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
if (truncate_node_records(witness_conn) == false)
{
PQfinish(primary_conn);
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
}
/* create record on primary */
/*
* node record exists - update it (at this point we have already
* established that -F/--force is in use)
*/
init_node_record(&node_record);
node_record.type = WITNESS;
node_record.priority = 0;
node_record.upstream_node_id = NO_UPSTREAM_NODE;
if (record_status == RECORD_FOUND)
{
record_created = update_node_record(primary_conn,
"witness register",
&node_record);
}
else
{
record_created = create_node_record(primary_conn,
"witness register",
&node_record);
}
/* sync records */
if (witness_copy_node_records(primary_conn, witness_conn) == false)
{
log_error(_("unable to copy repmgr node records from primary"));
PQfinish(primary_conn);
PQfinish(witness_conn);
exit(ERR_BAD_CONFIG);
}
/* create event */
PQfinish(primary_conn);
PQfinish(witness_conn);
log_info(_("witness registration complete"));
log_notice(_("witness node \"%s\" (ID: %i) successfully registered"),
config_file_options.node_name, config_file_options.node_id);
return;
}

View File

@@ -1346,6 +1346,16 @@ check_cli_parameters(const int action)
*/
}
break;
case WITNESS_REGISTER:
{
if (!runtime_options.host_param_provided)
{
item_list_append_format(&cli_errors,
_("host name for the source node must be provided when executing %s"),
action_name(action));
}
}
break;
case NODE_STATUS:
if (runtime_options.node_id != UNKNOWN_NODE_ID)
{
@@ -1689,6 +1699,11 @@ action_name(const int action)
case STANDBY_FOLLOW:
return "STANDBY FOLLOW";
case WITNESS_REGISTER:
return "WITNESS REGISTER";
case WITNESS_UNREGISTER:
return "WITNESS UNREGISTER";
case BDR_REGISTER:
return "BDR REGISTER";
case BDR_UNREGISTER:
@@ -2670,7 +2685,6 @@ init_node_record(t_node_info *node_record)
node_record->priority = config_file_options.priority;
node_record->active = true;
if (config_file_options.location[0] != '\0')
strncpy(node_record->location, config_file_options.location, MAXLEN);
else
@@ -2699,6 +2713,4 @@ init_node_record(t_node_info *node_record)
{
maxlen_snprintf(node_record->slot_name, "repmgr_slot_%i", config_file_options.node_id);
}
}

View File

@@ -68,7 +68,12 @@
#recovery_min_apply_delay= # If provided, "recovery_min_apply_delay" in recovery.conf
# will be set to this value.
#------------------------------------------------------------------------------
# Witness server settings
#------------------------------------------------------------------------------
#witness_sync_interval=15 # interval (in seconds) to synchronise node records
# to the witness server
#------------------------------------------------------------------------------
# Logging settings

View File

@@ -61,20 +61,20 @@
* various default values - ensure repmgr.conf.sample is update
* if any of these are changed
*/
#define DEFAULT_LOCATION "default"
#define DEFAULT_PRIORITY 100
#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */
#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */
#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
#define DEFAULT_LOCATION "default"
#define DEFAULT_PRIORITY 100
#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */
#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */
#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
#ifndef RECOVERY_COMMAND_FILE
#define RECOVERY_COMMAND_FILE "recovery.conf"

View File

@@ -1012,6 +1012,15 @@ loop:
#endif
}
void
monitor_streaming_witness(void)
{
log_error("not yet implemented");
return;
}
#ifndef BDR_ONLY
static bool
do_primary_failover(void)

View File

@@ -23,6 +23,7 @@ void do_physical_node_check(void);
void monitor_streaming_primary(void);
void monitor_streaming_standby(void);
void monitor_streaming_witness(void);
void close_connections_physical(void);
#endif /* _REPMGRD_PHYSICAL_H_ */

View File

@@ -407,6 +407,9 @@ start_monitoring(void)
case STANDBY:
monitor_streaming_standby();
break;
case WITNESS:
monitor_streaming_witness();
break;
#else
case PRIMARY:
case STANDBY: