mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Regularly sync witness server repl_nodes table.
Although the witness server will resync the repl_nodes table following a failover, other operations (e.g. removing or cloning a standby) were previously not reflected in the witness server's copy of this table. As a short-term workaround, automatically resync the table at regular intervals (defined by the configuration file parameter "witness_repl_nodes_sync_interval_secs", default 30 seconds).
This commit is contained in:
5
config.c
5
config.c
@@ -235,6 +235,9 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = 2;
|
||||
options->retry_promote_interval_secs = 300;
|
||||
|
||||
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||
|
||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||
|
||||
options->tablespace_mapping.head = NULL;
|
||||
@@ -358,6 +361,8 @@ parse_config(t_configuration_options *options)
|
||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||
else if (strcmp(name, "use_replication_slots") == 0)
|
||||
/* XXX we should have a dedicated boolean argument format */
|
||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||
|
||||
3
config.h
3
config.h
@@ -75,13 +75,14 @@ typedef struct
|
||||
char logfile[MAXLEN];
|
||||
int monitor_interval_secs;
|
||||
int retry_promote_interval_secs;
|
||||
int witness_repl_nodes_sync_interval_secs;
|
||||
int use_replication_slots;
|
||||
char event_notification_command[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
TablespaceList tablespace_mapping;
|
||||
} t_configuration_options;
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||
|
||||
typedef struct ErrorListCell
|
||||
{
|
||||
|
||||
20
dbutils.c
20
dbutils.c
@@ -1138,7 +1138,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
}
|
||||
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||
get_repmgr_schema_quoted(masterconn));
|
||||
|
||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||
@@ -1158,7 +1158,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
|
||||
log_verbose(LOG_DEBUG,
|
||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||
PQgetvalue(res, i, 4),
|
||||
PQgetvalue(res, i, 3),
|
||||
PQgetvalue(res, i, 0));
|
||||
|
||||
node_record_created = create_node_record(witnessconn,
|
||||
@@ -1174,7 +1174,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
atoi(PQgetvalue(res, i, 5)),
|
||||
strlen(PQgetvalue(res, i, 6))
|
||||
? PQgetvalue(res, i, 6)
|
||||
: NULL
|
||||
: NULL,
|
||||
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||
? true
|
||||
: false
|
||||
);
|
||||
|
||||
if (node_record_created == false)
|
||||
@@ -1200,7 +1203,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
||||
* XXX we should pass the record parameters as a struct.
|
||||
*/
|
||||
bool
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||
{
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
char upstream_node_id[MAXLEN];
|
||||
@@ -1241,8 +1244,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes "
|
||||
" (id, type, upstream_node_id, cluster, "
|
||||
" name, conninfo, slot_name, priority) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
||||
" name, conninfo, slot_name, priority, active) "
|
||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||
get_repmgr_schema_quoted(conn),
|
||||
node,
|
||||
type,
|
||||
@@ -1251,7 +1254,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
||||
node_name,
|
||||
conninfo,
|
||||
slot_name_buf,
|
||||
priority);
|
||||
priority,
|
||||
active == true ? "TRUE" : "FALSE");
|
||||
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||
|
||||
@@ -1291,7 +1295,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
||||
|
||||
if (action != NULL)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
||||
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||
}
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
|
||||
@@ -122,7 +122,7 @@ bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
|
||||
11
repmgr.c
11
repmgr.c
@@ -1071,7 +1071,8 @@ do_master_register(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
repmgr_slot_name_ptr);
|
||||
repmgr_slot_name_ptr,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
@@ -1172,9 +1173,8 @@ do_standby_register(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
repmgr_slot_name_ptr);
|
||||
|
||||
|
||||
repmgr_slot_name_ptr,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
@@ -3784,7 +3784,8 @@ do_witness_create(void)
|
||||
options.node_name,
|
||||
options.conninfo,
|
||||
options.priority,
|
||||
NULL);
|
||||
NULL,
|
||||
true);
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
|
||||
@@ -146,3 +146,6 @@ monitor_interval_secs=2
|
||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||
# default value is 300)
|
||||
retry_promote_interval_secs=300
|
||||
|
||||
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||
witness_repl_nodes_sync_interval_secs=15
|
||||
|
||||
26
repmgrd.c
26
repmgrd.c
@@ -298,9 +298,12 @@ main(int argc, char **argv)
|
||||
*/
|
||||
do
|
||||
{
|
||||
/* Timer for repl_nodes synchronisation interval */
|
||||
int sync_repl_nodes_elapsed = 0;
|
||||
|
||||
/*
|
||||
* Set my server mode, establish a connection to master and start
|
||||
* monitor
|
||||
* monitoring
|
||||
*/
|
||||
|
||||
switch (node_info.type)
|
||||
@@ -472,6 +475,24 @@ main(int argc, char **argv)
|
||||
|
||||
sleep(local_options.monitor_interval_secs);
|
||||
|
||||
/*
|
||||
* On a witness node, regularly resync the repl_nodes table
|
||||
* to keep up with any changes on the primary
|
||||
*
|
||||
* TODO: only resync the table if changes actually detected
|
||||
*/
|
||||
if (node_info.type == WITNESS)
|
||||
{
|
||||
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
|
||||
log_debug(_("%i - %i \n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
|
||||
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
|
||||
{
|
||||
log_debug(_("Resyncing repl_nodes table\n"));
|
||||
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
||||
sync_repl_nodes_elapsed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (got_SIGHUP)
|
||||
{
|
||||
/*
|
||||
@@ -486,6 +507,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
got_SIGHUP = false;
|
||||
}
|
||||
|
||||
if (failover_done)
|
||||
{
|
||||
log_debug(_("standby check loop will terminate\n"));
|
||||
@@ -1954,6 +1976,8 @@ check_node_configuration(void)
|
||||
/* Adding the node */
|
||||
log_info(_("adding node %d to cluster '%s'\n"),
|
||||
local_options.node, local_options.cluster_name);
|
||||
|
||||
/* XXX use create_node_record() */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"INSERT INTO %s.repl_nodes"
|
||||
" (id, cluster, name, conninfo, priority, witness) "
|
||||
|
||||
Reference in New Issue
Block a user