mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 14:46:29 +00:00
repmgrd: activate inactive node record at startup
If a PostgreSQL instance was shut down while repmgrd was running, and repmgrd was subsequently restarted (this chain of events could occur during e.g. a server reboot), the node record will have been set to "inactive". Previously, in this case repmgrd would refuse to start up. However, as we can determine the node is running, it should normally be no problem to automatically set the node record to "active". The old behaviour can be restored by setting the new parameter "repmgrd_exit_on_inactive_node" to "true". RM19604.
This commit is contained in:
2
HISTORY
2
HISTORY
@@ -1,6 +1,8 @@
|
||||
5.3.0 2021-??-??
|
||||
repmgrd: prefix all shared library functions with "repmgr_" to
|
||||
minimize the risk of clashes with other shared libraries (Ian)
|
||||
repmgrd: at startup, if node record is marked as "inactive", attempt
|
||||
to set it to "active" (Ian)
|
||||
|
||||
5.2.2. 2021-??-??
|
||||
standby clone: set "slot_name" in node record if required (Ian)
|
||||
|
||||
10
configdata.c
10
configdata.c
@@ -581,6 +581,16 @@ struct ConfigFileSetting config_file_settings[] =
|
||||
{ .strmaxlen = sizeof(config_file_options.repmgrd_pid_file) },
|
||||
{ .postprocess_func = &repmgr_canonicalize_path }
|
||||
},
|
||||
/* repmgrd_exit_on_inactive_node */
|
||||
{
|
||||
"repmgrd_exit_on_inactive_node",
|
||||
CONFIG_BOOL,
|
||||
{ .boolptr = &config_file_options.repmgrd_exit_on_inactive_node},
|
||||
{ .booldefault = DEFAULT_REPMGRD_EXIT_ON_INACTIVE_NODE },
|
||||
{},
|
||||
{},
|
||||
{}
|
||||
},
|
||||
/* standby_disconnect_on_failover */
|
||||
{
|
||||
"standby_disconnect_on_failover",
|
||||
|
||||
@@ -206,6 +206,7 @@ typedef struct
|
||||
int primary_notification_timeout;
|
||||
int repmgrd_standby_startup_timeout;
|
||||
char repmgrd_pid_file[MAXPGPATH];
|
||||
bool repmgrd_exit_on_inactive_node;
|
||||
bool standby_disconnect_on_failover;
|
||||
int sibling_nodes_disconnect_timeout;
|
||||
ConnectionCheckType connection_check_type;
|
||||
|
||||
@@ -26,6 +26,23 @@
|
||||
This release provides support for <ulink url="https://www.postgresql.org/docs/14/release-14.html">PostgreSQL 14</ulink>,
|
||||
to be released later in 2021.
|
||||
</para>
|
||||
<sect2>
|
||||
<title>Improvements</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgrd;: at startup, if node record is marked as "inactive", attempt
|
||||
to set it to "active".
|
||||
</para>
|
||||
<para>
|
||||
This behaviour can be overridden by setting the configuration parameter
|
||||
<varname>repmgrd_exit_on_inactive_node</varname> to <literal>true</literal>.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
|
||||
@@ -485,6 +485,32 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<term><option>repmgrd_exit_on_inactive_node</option></term>
|
||||
<listitem>
|
||||
<indexterm>
|
||||
<primary>repmgrd_exit_on_inactive_node</primary>
|
||||
</indexterm>
|
||||
<para>
|
||||
This parameter is available in &repmgr; 5.3 and later.
|
||||
</para>
|
||||
<para>
|
||||
If a node was marked as inactive but is running, and this option is set to
|
||||
<literal>true</literal>, &repmgrd; will abort on startup.
|
||||
</para>
|
||||
<para>
|
||||
By default, <option>repmgrd_exit_on_inactive_node</option> is set
|
||||
to <literal>false</literal>, in which case &repmgrd; will set the
|
||||
node record to active on startup.
|
||||
</para>
|
||||
<para>
|
||||
Setting this parameter to <literal>true</literal> causes &repmgrd;
|
||||
to behave in the same way it did in &repmgr; 5.2 and earlier.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -337,6 +337,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# "--no-pid-file" will force PID file creation to be skipped.
|
||||
# Note: there is normally no need to set this, particularly if
|
||||
# repmgr was installed from packages.
|
||||
#repmgrd_exit_on_inactive_node=false # If "true", and the node record is marked as "inactive", abort repmgrd startup
|
||||
#standby_disconnect_on_failover=false # If "true", in a failover situation wait for all standbys to
|
||||
# disconnect their WAL receivers before electing a new primary
|
||||
# (PostgreSQL 9.5 and later only; repmgr user must be a superuser for this)
|
||||
|
||||
1
repmgr.h
1
repmgr.h
@@ -135,6 +135,7 @@
|
||||
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_REPMGRD_STANDBY_STARTUP_TIMEOUT -1 /*seconds */
|
||||
#define DEFAULT_REPMGRD_EXIT_ON_INACTIVE_NODE false,
|
||||
#define DEFAULT_STANDBY_DISCONNECT_ON_FAILOVER false
|
||||
#define DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_CONNECTION_CHECK_TYPE CHECK_PING
|
||||
|
||||
@@ -169,7 +169,7 @@ handle_sigint_physical(SIGNAL_ARGS)
|
||||
/* perform some sanity checks on the node's configuration */
|
||||
|
||||
void
|
||||
do_physical_node_check(void)
|
||||
do_physical_node_check(PGconn *conn)
|
||||
{
|
||||
/*
|
||||
* Check if node record is active - if not, and `failover=automatic`, the
|
||||
@@ -186,8 +186,37 @@ do_physical_node_check(void)
|
||||
{
|
||||
char *hint = "Check that \"repmgr (primary|standby) register\" was executed for this node";
|
||||
|
||||
/*
|
||||
* Attempt to set node record active (unless explicitly configured not to)
|
||||
*/
|
||||
if (config_file_options.repmgrd_exit_on_inactive_node == false)
|
||||
{
|
||||
PGconn *primary_conn = get_primary_connection_quiet(conn, NULL, NULL);
|
||||
bool success = true;
|
||||
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_notice(_("setting node record for node \"%s\" (ID: %i) to \"active\""),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
success = update_node_record_set_active(primary_conn, local_node_info.node_id, true);
|
||||
PQfinish(primary_conn);
|
||||
}
|
||||
|
||||
if (success == true)
|
||||
{
|
||||
local_node_info.active = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
switch (config_file_options.failover)
|
||||
{
|
||||
|
||||
/* "failover" is an enum, all values should be covered here */
|
||||
|
||||
case FAILOVER_AUTOMATIC:
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#ifndef _REPMGRD_PHYSICAL_H_
|
||||
#define _REPMGRD_PHYSICAL_H_
|
||||
|
||||
void do_physical_node_check(void);
|
||||
void do_physical_node_check(PGconn *conn);
|
||||
|
||||
void monitor_streaming_primary(void);
|
||||
void monitor_streaming_standby(void);
|
||||
|
||||
Reference in New Issue
Block a user