mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
Regularly sync witness server repl_nodes table.
Although the witness server will resync the repl_nodes table following a failover, other operations (e.g. removing or cloning a standby) were previously not reflected in the witness server's copy of this table. As a short-term workaround, automatically resync the table at regular intervals (defined by the configuration file parameter "witness_repl_nodes_sync_interval_secs", default 30 seconds).
This commit is contained in:
5
config.c
5
config.c
@@ -235,6 +235,9 @@ parse_config(t_configuration_options *options)
|
|||||||
options->monitor_interval_secs = 2;
|
options->monitor_interval_secs = 2;
|
||||||
options->retry_promote_interval_secs = 300;
|
options->retry_promote_interval_secs = 300;
|
||||||
|
|
||||||
|
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
|
||||||
|
options->witness_repl_nodes_sync_interval_secs = 30;
|
||||||
|
|
||||||
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
|
||||||
|
|
||||||
options->tablespace_mapping.head = NULL;
|
options->tablespace_mapping.head = NULL;
|
||||||
@@ -358,6 +361,8 @@ parse_config(t_configuration_options *options)
|
|||||||
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
|
||||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||||
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
|
||||||
|
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
|
||||||
|
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
|
||||||
else if (strcmp(name, "use_replication_slots") == 0)
|
else if (strcmp(name, "use_replication_slots") == 0)
|
||||||
/* XXX we should have a dedicated boolean argument format */
|
/* XXX we should have a dedicated boolean argument format */
|
||||||
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
|
||||||
|
|||||||
3
config.h
3
config.h
@@ -75,13 +75,14 @@ typedef struct
|
|||||||
char logfile[MAXLEN];
|
char logfile[MAXLEN];
|
||||||
int monitor_interval_secs;
|
int monitor_interval_secs;
|
||||||
int retry_promote_interval_secs;
|
int retry_promote_interval_secs;
|
||||||
|
int witness_repl_nodes_sync_interval_secs;
|
||||||
int use_replication_slots;
|
int use_replication_slots;
|
||||||
char event_notification_command[MAXLEN];
|
char event_notification_command[MAXLEN];
|
||||||
EventNotificationList event_notifications;
|
EventNotificationList event_notifications;
|
||||||
TablespaceList tablespace_mapping;
|
TablespaceList tablespace_mapping;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
|
||||||
|
|
||||||
typedef struct ErrorListCell
|
typedef struct ErrorListCell
|
||||||
{
|
{
|
||||||
|
|||||||
20
dbutils.c
20
dbutils.c
@@ -1138,7 +1138,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
|
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
|
||||||
get_repmgr_schema_quoted(masterconn));
|
get_repmgr_schema_quoted(masterconn));
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
|
||||||
@@ -1158,7 +1158,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
|
|
||||||
log_verbose(LOG_DEBUG,
|
log_verbose(LOG_DEBUG,
|
||||||
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
"copy_configuration(): writing node record for node %s (id: %s)\n",
|
||||||
PQgetvalue(res, i, 4),
|
PQgetvalue(res, i, 3),
|
||||||
PQgetvalue(res, i, 0));
|
PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
node_record_created = create_node_record(witnessconn,
|
node_record_created = create_node_record(witnessconn,
|
||||||
@@ -1174,7 +1174,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
atoi(PQgetvalue(res, i, 5)),
|
atoi(PQgetvalue(res, i, 5)),
|
||||||
strlen(PQgetvalue(res, i, 6))
|
strlen(PQgetvalue(res, i, 6))
|
||||||
? PQgetvalue(res, i, 6)
|
? PQgetvalue(res, i, 6)
|
||||||
: NULL
|
: NULL,
|
||||||
|
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
|
||||||
|
? true
|
||||||
|
: false
|
||||||
);
|
);
|
||||||
|
|
||||||
if (node_record_created == false)
|
if (node_record_created == false)
|
||||||
@@ -1200,7 +1203,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
|
|||||||
* XXX we should pass the record parameters as a struct.
|
* XXX we should pass the record parameters as a struct.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
|
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
char upstream_node_id[MAXLEN];
|
char upstream_node_id[MAXLEN];
|
||||||
@@ -1241,8 +1244,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_nodes "
|
"INSERT INTO %s.repl_nodes "
|
||||||
" (id, type, upstream_node_id, cluster, "
|
" (id, type, upstream_node_id, cluster, "
|
||||||
" name, conninfo, slot_name, priority) "
|
" name, conninfo, slot_name, priority, active) "
|
||||||
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
|
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
|
||||||
get_repmgr_schema_quoted(conn),
|
get_repmgr_schema_quoted(conn),
|
||||||
node,
|
node,
|
||||||
type,
|
type,
|
||||||
@@ -1251,7 +1254,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
|
|||||||
node_name,
|
node_name,
|
||||||
conninfo,
|
conninfo,
|
||||||
slot_name_buf,
|
slot_name_buf,
|
||||||
priority);
|
priority,
|
||||||
|
active == true ? "TRUE" : "FALSE");
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
|
||||||
|
|
||||||
@@ -1291,7 +1295,7 @@ delete_node_record(PGconn *conn, int node, char *action)
|
|||||||
|
|
||||||
if (action != NULL)
|
if (action != NULL)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
|
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
|
||||||
}
|
}
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
|
|||||||
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
bool stop_backup(PGconn *conn, char *last_wal_segment);
|
||||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||||
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
|
||||||
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
|
|||||||
11
repmgr.c
11
repmgr.c
@@ -1071,7 +1071,8 @@ do_master_register(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr,
|
||||||
|
true);
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
@@ -1172,9 +1173,8 @@ do_standby_register(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr,
|
||||||
|
true);
|
||||||
|
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
@@ -3784,7 +3784,8 @@ do_witness_create(void)
|
|||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
NULL);
|
NULL,
|
||||||
|
true);
|
||||||
|
|
||||||
if (record_created == false)
|
if (record_created == false)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -146,3 +146,6 @@ monitor_interval_secs=2
|
|||||||
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
|
||||||
# default value is 300)
|
# default value is 300)
|
||||||
retry_promote_interval_secs=300
|
retry_promote_interval_secs=300
|
||||||
|
|
||||||
|
# Number of seconds after which the witness server resyncs the repl_nodes table
|
||||||
|
witness_repl_nodes_sync_interval_secs=15
|
||||||
|
|||||||
26
repmgrd.c
26
repmgrd.c
@@ -298,9 +298,12 @@ main(int argc, char **argv)
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
/* Timer for repl_nodes synchronisation interval */
|
||||||
|
int sync_repl_nodes_elapsed = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set my server mode, establish a connection to master and start
|
* Set my server mode, establish a connection to master and start
|
||||||
* monitor
|
* monitoring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
switch (node_info.type)
|
switch (node_info.type)
|
||||||
@@ -472,6 +475,24 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On a witness node, regularly resync the repl_nodes table
|
||||||
|
* to keep up with any changes on the primary
|
||||||
|
*
|
||||||
|
* TODO: only resync the table if changes actually detected
|
||||||
|
*/
|
||||||
|
if (node_info.type == WITNESS)
|
||||||
|
{
|
||||||
|
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
|
||||||
|
log_debug(_("%i - %i \n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
|
||||||
|
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
|
||||||
|
{
|
||||||
|
log_debug(_("Resyncing repl_nodes table\n"));
|
||||||
|
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
||||||
|
sync_repl_nodes_elapsed = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -486,6 +507,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failover_done)
|
if (failover_done)
|
||||||
{
|
{
|
||||||
log_debug(_("standby check loop will terminate\n"));
|
log_debug(_("standby check loop will terminate\n"));
|
||||||
@@ -1954,6 +1976,8 @@ check_node_configuration(void)
|
|||||||
/* Adding the node */
|
/* Adding the node */
|
||||||
log_info(_("adding node %d to cluster '%s'\n"),
|
log_info(_("adding node %d to cluster '%s'\n"),
|
||||||
local_options.node, local_options.cluster_name);
|
local_options.node, local_options.cluster_name);
|
||||||
|
|
||||||
|
/* XXX use create_node_record() */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_nodes"
|
"INSERT INTO %s.repl_nodes"
|
||||||
" (id, cluster, name, conninfo, priority, witness) "
|
" (id, cluster, name, conninfo, priority, witness) "
|
||||||
|
|||||||
Reference in New Issue
Block a user