From 79d1f005db7cc3c465094e1a2bb4374f069469db Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 12 Jul 2021 17:38:40 +0900 Subject: [PATCH] repmgrd: activate inactive node record at startup If a PostgreSQL instance was shut down while repmgrd was running, and repmgrd was subsequently restarted (this chain of events could occur during e.g. a server reboot), the node record will have been set to "inactive". Previously, in this case repmgrd would refuse to start up. However, as we can determine the node is running, it should normally be no problem to automatically set the node record to "active". The old behaviour can be restored by setting the new parameter "repmgrd_exit_on_inactive_node" to "true". RM19604. --- HISTORY | 2 ++ configdata.c | 10 ++++++++++ configfile.h | 1 + doc/appendix-release-notes.xml | 17 +++++++++++++++++ doc/repmgrd-configuration.xml | 26 ++++++++++++++++++++++++++ repmgr.conf.sample | 1 + repmgr.h | 1 + repmgrd-physical.c | 31 ++++++++++++++++++++++++++++++- repmgrd-physical.h | 2 +- repmgrd.c | 2 +- 10 files changed, 90 insertions(+), 3 deletions(-) diff --git a/HISTORY b/HISTORY index a38f21eb..567e3257 100644 --- a/HISTORY +++ b/HISTORY @@ -1,6 +1,8 @@ 5.3.0 2021-??-?? repmgrd: prefix all shared library functions with "repmgr_" to minimize the risk of clashes with other shared libraries (Ian) + repmgrd: at startup, if node record is marked as "inactive", attempt + to set it to "active" (Ian) 5.2.2. 2021-??-?? standby clone: set "slot_name" in node record if required (Ian) diff --git a/configdata.c b/configdata.c index 46439922..d40d158a 100644 --- a/configdata.c +++ b/configdata.c @@ -581,6 +581,16 @@ struct ConfigFileSetting config_file_settings[] = { .strmaxlen = sizeof(config_file_options.repmgrd_pid_file) }, { .postprocess_func = &repmgr_canonicalize_path } }, + /* repmgrd_exit_on_inactive_node */ + { + "repmgrd_exit_on_inactive_node", + CONFIG_BOOL, + { .boolptr = &config_file_options.repmgrd_exit_on_inactive_node}, + { .booldefault = DEFAULT_REPMGRD_EXIT_ON_INACTIVE_NODE }, + {}, + {}, + {} + }, /* standby_disconnect_on_failover */ { "standby_disconnect_on_failover", diff --git a/configfile.h b/configfile.h index 3cdb2939..3bcb9f51 100644 --- a/configfile.h +++ b/configfile.h @@ -206,6 +206,7 @@ typedef struct int primary_notification_timeout; int repmgrd_standby_startup_timeout; char repmgrd_pid_file[MAXPGPATH]; + bool repmgrd_exit_on_inactive_node; bool standby_disconnect_on_failover; int sibling_nodes_disconnect_timeout; ConnectionCheckType connection_check_type; diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index 58228060..c5adb7fd 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -26,6 +26,23 @@ This release provides support for PostgreSQL 14, to be released later in 2021. + + Improvements + + + + + &repmgrd;: at startup, if node record is marked as "inactive", attempt + to set it to "active". + + + This behaviour can be overridden by setting the configuration parameter + repmgrd_exit_on_inactive_node to true. + + + + + Bug fixes diff --git a/doc/repmgrd-configuration.xml b/doc/repmgrd-configuration.xml index f4ebd785..31a4eea8 100644 --- a/doc/repmgrd-configuration.xml +++ b/doc/repmgrd-configuration.xml @@ -485,6 +485,32 @@ + + + + + + repmgrd_exit_on_inactive_node + + + This parameter is available in &repmgr; 5.3 and later. + + + If a node was marked as inactive but is running, and this option is set to + true, &repmgrd; will abort on startup. + + + By default, is set + to false, in which case &repmgrd; will set the + node record to active on startup. + + + Setting this parameter to true causes &repmgrd; + to behave in the same way it did in &repmgr; 5.2 and earlier. + + + + diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 0f06de0c..4ce39571 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -337,6 +337,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # "--no-pid-file" will force PID file creation to be skipped. # Note: there is normally no need to set this, particularly if # repmgr was installed from packages. +#repmgrd_exit_on_inactive_node=false # If "true", and the node record is marked as "inactive", abort repmgrd startup #standby_disconnect_on_failover=false # If "true", in a failover situation wait for all standbys to # disconnect their WAL receivers before electing a new primary # (PostgreSQL 9.5 and later only; repmgr user must be a superuser for this) diff --git a/repmgr.h b/repmgr.h index d59ef124..d9d231cc 100644 --- a/repmgr.h +++ b/repmgr.h @@ -135,6 +135,7 @@ #define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */ #define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ #define DEFAULT_REPMGRD_STANDBY_STARTUP_TIMEOUT -1 /*seconds */ +#define DEFAULT_REPMGRD_EXIT_ON_INACTIVE_NODE false, #define DEFAULT_STANDBY_DISCONNECT_ON_FAILOVER false #define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */ #define DEFAULT_CONNECTION_CHECK_TYPE CHECK_PING diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 5c1d3e03..ddf0f0fe 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -169,7 +169,7 @@ handle_sigint_physical(SIGNAL_ARGS) /* perform some sanity checks on the node's configuration */ void -do_physical_node_check(void) +do_physical_node_check(PGconn *conn) { /* * Check if node record is active - if not, and `failover=automatic`, the @@ -186,8 +186,37 @@ do_physical_node_check(void) { char *hint = "Check that \"repmgr (primary|standby) register\" was executed for this node"; + /* + * Attempt to set node record active (unless explicitly configured not to) + */ + if (config_file_options.repmgrd_exit_on_inactive_node == false) + { + PGconn *primary_conn = get_primary_connection_quiet(conn, NULL, NULL); + bool success = true; + + if (PQstatus(primary_conn) != CONNECTION_OK) + { + success = false; + } + else + { + log_notice(_("setting node record for node \"%s\" (ID: %i) to \"active\""), + local_node_info.node_name, + local_node_info.node_id); + success = update_node_record_set_active(primary_conn, local_node_info.node_id, true); + PQfinish(primary_conn); + } + + if (success == true) + { + local_node_info.active = true; + return; + } + } + switch (config_file_options.failover) { + /* "failover" is an enum, all values should be covered here */ case FAILOVER_AUTOMATIC: diff --git a/repmgrd-physical.h b/repmgrd-physical.h index 114303f0..c02e11cf 100644 --- a/repmgrd-physical.h +++ b/repmgrd-physical.h @@ -19,7 +19,7 @@ #ifndef _REPMGRD_PHYSICAL_H_ #define _REPMGRD_PHYSICAL_H_ -void do_physical_node_check(void); +void do_physical_node_check(PGconn *conn); void monitor_streaming_primary(void); void monitor_streaming_standby(void); diff --git a/repmgrd.c b/repmgrd.c index 9d291133..94be8d6a 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -512,7 +512,7 @@ main(int argc, char **argv) log_debug("node id is %i, upstream node id is %i", local_node_info.node_id, local_node_info.upstream_node_id); - do_physical_node_check(); + do_physical_node_check(local_conn); } if (daemonize == true)