mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
4 Commits
dev/standb
...
dev/drop_r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a73bdcfd0 | ||
|
|
e27f213949 | ||
|
|
cce5ca2245 | ||
|
|
6f87d2c61e |
1
HISTORY
1
HISTORY
@@ -2,6 +2,7 @@
|
||||
standby clone: don't error out if unable to determine cluster size (Ian)
|
||||
node check: fix --downstream --nagios output; GitHub #749 (Ian)
|
||||
repmgrd: ensure witness node marked active (hslightdb)
|
||||
repmgrd: improve walsender disable check (Ian)
|
||||
|
||||
5.3.1 2022-02-15
|
||||
repmgrd: fixes for potential connection leaks (hslightdb)
|
||||
|
||||
36
dbutils.c
36
dbutils.c
@@ -1888,6 +1888,42 @@ can_execute_pg_promote(PGconn *conn)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Determine if the user associated with the current connection
|
||||
* has sufficient permissions to disable the walsender
|
||||
*/
|
||||
bool
|
||||
can_disable_walsender(PGconn *conn)
|
||||
{
|
||||
/*
|
||||
* Requires PostgreSQL 9.5 or later, because ALTER SYSTEM
|
||||
*/
|
||||
if (PQserverVersion(conn) < 90500)
|
||||
{
|
||||
log_warning(_("\"standby_disconnect_on_failover\" specified, but not available for this PostgreSQL version"));
|
||||
/* TODO: format server version */
|
||||
log_detail(_("available from PostgreSQL 9.5; this PostgreSQL version is %i"), PQserverVersion(conn));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Superusers can do anything
|
||||
*/
|
||||
if (is_superuser_connection(conn, NULL) == true)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* As of PostgreSQL 14, it is not possible for a non-superuser
|
||||
* to execute ALTER SYSTEM, so further checks are superfluous.
|
||||
* This will need modifying for PostgreSQL 15.
|
||||
*/
|
||||
log_warning(_("\"standby_disconnect_on_failover\" specified, but repmgr user is not a superuser"));
|
||||
log_detail(_("superuser permission required to disable standbys on failover"));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if the user associated with the current connection is
|
||||
* a member of the "pg_monitor" default role, or optionally one
|
||||
|
||||
@@ -454,6 +454,7 @@ pid_t get_wal_receiver_pid(PGconn *conn);
|
||||
|
||||
/* user/role information functions */
|
||||
bool can_execute_pg_promote(PGconn *conn);
|
||||
bool can_disable_walsender(PGconn *conn);
|
||||
bool connection_has_pg_monitor_role(PGconn *conn, const char *subrole);
|
||||
bool is_replication_role(PGconn *conn, char *rolname);
|
||||
bool is_superuser_connection(PGconn *conn, t_connection_user *userinfo);
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
<para>
|
||||
See also: <xref linkend="upgrading-repmgr"/>
|
||||
</para>
|
||||
|
||||
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
||||
<sect1 id="release-5.3.2">
|
||||
<title id="release-current">Release 5.3.2</title>
|
||||
<para><emphasis>??? ??? ???, 2022</emphasis></para>
|
||||
@@ -53,12 +55,17 @@
|
||||
GitHub #754.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgrd;: if <varname>standby_disconnect_on_failover</varname> is set, verify
|
||||
&repmgr; is a superuser before attempting to disable the WAL receiver.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
||||
<sect1 id="release-5.3.1">
|
||||
<title>Release 5.3.1</title>
|
||||
<para><emphasis>Tue 15 February, 2022</emphasis></para>
|
||||
|
||||
@@ -3348,10 +3348,9 @@ do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_n
|
||||
update_node_record_slot_name(primary_conn, config_file_options.node_id, local_node_record.slot_name);
|
||||
}
|
||||
|
||||
|
||||
if (create_replication_slot(follow_target_conn,
|
||||
local_node_record.slot_name,
|
||||
NULL,
|
||||
follow_target_node_record,
|
||||
output) == false)
|
||||
{
|
||||
log_error("%s", output->data);
|
||||
|
||||
190
repmgr-client.c
190
repmgr-client.c
@@ -90,17 +90,22 @@ char pg_bindir[MAXPGPATH] = "";
|
||||
*/
|
||||
t_node_info target_node_info = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
/* used by create_replication_slot() */
|
||||
/* set by the first call to _determine_replication_slot_user() */
|
||||
static t_user_type ReplicationSlotUser = USER_TYPE_UNKNOWN;
|
||||
|
||||
/* Collate command line errors and warnings here for friendlier reporting */
|
||||
static ItemList cli_errors = {NULL, NULL};
|
||||
static ItemList cli_warnings = {NULL, NULL};
|
||||
|
||||
|
||||
static void _determine_replication_slot_user(PGconn *conn,
|
||||
t_node_info *upstream_node_record,
|
||||
char **replication_user);
|
||||
|
||||
static PGconn *_get_replication_slot_connection(PGconn *conn,
|
||||
char *replication_user,
|
||||
bool *use_replication_protocol);
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@@ -3739,6 +3744,7 @@ create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_nod
|
||||
char *replication_user = NULL;
|
||||
|
||||
_determine_replication_slot_user(conn, upstream_node_record, &replication_user);
|
||||
|
||||
/*
|
||||
* If called in --dry-run context, if the replication slot user is not the
|
||||
* repmgr user, attempt to validate the connection.
|
||||
@@ -3750,7 +3756,7 @@ create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_nod
|
||||
case USER_TYPE_UNKNOWN:
|
||||
log_error("unable to determine user for replication slot creation");
|
||||
return false;
|
||||
case REPMGR_USER:
|
||||
case REPMGR_USER:
|
||||
log_info(_("replication slots will be created by user \"%s\""),
|
||||
PQuser(conn));
|
||||
return true;
|
||||
@@ -3796,65 +3802,12 @@ create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_nod
|
||||
PQfinish(superuser_conn);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If we can't create a replication slot with the connection provided to
|
||||
* the function, create an connection with appropriate permissions.
|
||||
*/
|
||||
switch (ReplicationSlotUser)
|
||||
{
|
||||
case USER_TYPE_UNKNOWN:
|
||||
log_error("unable to determine user for replication slot creation");
|
||||
return false;
|
||||
case REPMGR_USER:
|
||||
slot_conn = conn;
|
||||
log_info(_("creating replication slot as user \"%s\""),
|
||||
PQuser(conn));
|
||||
break;
|
||||
slot_conn = _get_replication_slot_connection(conn, replication_user, &use_replication_protocol);
|
||||
|
||||
case REPLICATION_USER_NODE:
|
||||
case REPLICATION_USER_OPT:
|
||||
{
|
||||
slot_conn = duplicate_connection(conn,
|
||||
replication_user,
|
||||
true);
|
||||
if (slot_conn == NULL || PQstatus(slot_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to create replication connection as user \"%s\""),
|
||||
runtime_options.replication_user);
|
||||
log_detail("%s", PQerrorMessage(slot_conn));
|
||||
|
||||
PQfinish(slot_conn);
|
||||
return false;
|
||||
}
|
||||
use_replication_protocol = true;
|
||||
log_info(_("creating replication slot as replication user \"%s\""),
|
||||
replication_user);
|
||||
}
|
||||
break;
|
||||
|
||||
case SUPERUSER:
|
||||
{
|
||||
slot_conn = duplicate_connection(conn,
|
||||
runtime_options.superuser,
|
||||
false);
|
||||
if (slot_conn == NULL || PQstatus(slot_conn )!= CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to create super connection as user \"%s\""),
|
||||
runtime_options.superuser);
|
||||
log_detail("%s", PQerrorMessage(slot_conn));
|
||||
|
||||
PQfinish(slot_conn);
|
||||
|
||||
return false;
|
||||
}
|
||||
log_info(_("creating replication slot as superuser \"%s\""),
|
||||
runtime_options.superuser);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (slot_conn == NULL)
|
||||
return false;
|
||||
|
||||
if (use_replication_protocol == true)
|
||||
{
|
||||
@@ -3897,34 +3850,55 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
/* this is not a bad good thing */
|
||||
/* no slot, no problem */
|
||||
log_verbose(LOG_INFO,
|
||||
_("slot \"%s\" does not exist on node %i, nothing to remove"),
|
||||
slot_name, node_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (slot_info.active == false)
|
||||
if (slot_info.active == true)
|
||||
{
|
||||
if (drop_replication_slot_sql(conn, slot_name) == true)
|
||||
/*
|
||||
* If an active replication slot exists, bail out as we have a problem
|
||||
* we can't solve here.
|
||||
*/
|
||||
log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Create the appropriate connection with which to drop the slot
|
||||
*/
|
||||
|
||||
bool use_replication_protocol = false;
|
||||
PGconn *slot_conn = NULL;
|
||||
|
||||
slot_conn = _get_replication_slot_connection(conn,
|
||||
replication_user,
|
||||
&use_replication_protocol);
|
||||
|
||||
if (use_replication_protocol == true)
|
||||
{
|
||||
success = drop_replication_slot_replprot(conn, slot_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
success = drop_replication_slot_sql(conn, slot_name);
|
||||
}
|
||||
|
||||
if (success == true)
|
||||
{
|
||||
log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If an active replication slot exists, call Houston as we have a
|
||||
* problem.
|
||||
*/
|
||||
else
|
||||
{
|
||||
log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
|
||||
success = false;
|
||||
if (slot_conn != conn)
|
||||
PQfinish(slot_conn);
|
||||
}
|
||||
|
||||
return success;
|
||||
@@ -3986,10 +3960,84 @@ _determine_replication_slot_user(PGconn *conn, t_node_info *upstream_node_record
|
||||
ReplicationSlotUser = REPLICATION_USER_NODE;
|
||||
*replication_user = upstream_node_record->repluser;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This should never happen */
|
||||
log_error("unable to determine replication slot user");
|
||||
if (upstream_node_record != NULL)
|
||||
log_debug("%i %s %s", upstream_node_record->node_id, upstream_node_record->repluser, PQuser(conn));
|
||||
else
|
||||
log_debug("upstream_node_record not provided");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static PGconn *
|
||||
_get_replication_slot_connection(PGconn *conn, char *replication_user, bool *use_replication_protocol)
|
||||
{
|
||||
PGconn *slot_conn = NULL;
|
||||
/*
|
||||
* If we can't create a replication slot with the connection provided to
|
||||
* the function, create an connection with appropriate permissions.
|
||||
*/
|
||||
switch (ReplicationSlotUser)
|
||||
{
|
||||
case USER_TYPE_UNKNOWN:
|
||||
log_error("unable to determine user for replication slot creation");
|
||||
return NULL;
|
||||
|
||||
case REPMGR_USER:
|
||||
slot_conn = conn;
|
||||
log_info(_("creating replication slot as user \"%s\""),
|
||||
PQuser(conn));
|
||||
break;
|
||||
|
||||
case REPLICATION_USER_NODE:
|
||||
case REPLICATION_USER_OPT:
|
||||
{
|
||||
slot_conn = duplicate_connection(conn,
|
||||
replication_user,
|
||||
true);
|
||||
if (slot_conn == NULL || PQstatus(slot_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to create replication connection as user \"%s\""),
|
||||
runtime_options.replication_user);
|
||||
log_detail("%s", PQerrorMessage(slot_conn));
|
||||
|
||||
PQfinish(slot_conn);
|
||||
return NULL;
|
||||
}
|
||||
*use_replication_protocol = true;
|
||||
log_info(_("creating replication slot as replication user \"%s\""),
|
||||
replication_user);
|
||||
}
|
||||
break;
|
||||
|
||||
case SUPERUSER:
|
||||
{
|
||||
slot_conn = duplicate_connection(conn,
|
||||
runtime_options.superuser,
|
||||
false);
|
||||
if (slot_conn == NULL || PQstatus(slot_conn )!= CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to create super connection as user \"%s\""),
|
||||
runtime_options.superuser);
|
||||
log_detail("%s", PQerrorMessage(slot_conn));
|
||||
|
||||
PQfinish(slot_conn);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
log_info(_("creating replication slot as superuser \"%s\""),
|
||||
runtime_options.superuser);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return slot_conn;
|
||||
}
|
||||
|
||||
bool
|
||||
check_replication_slots_available(int node_id, PGconn* conn)
|
||||
{
|
||||
|
||||
@@ -2862,6 +2862,7 @@ do_primary_failover(void)
|
||||
bool final_result = false;
|
||||
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
int new_primary_id = UNKNOWN_NODE_ID;
|
||||
bool standby_disconnect_on_failover = false;
|
||||
|
||||
/*
|
||||
* Double-check status of the local connection
|
||||
@@ -2874,20 +2875,20 @@ do_primary_failover(void)
|
||||
*/
|
||||
if (config_file_options.standby_disconnect_on_failover == true)
|
||||
{
|
||||
NodeInfoListCell *cell = NULL;
|
||||
NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
int i;
|
||||
/*
|
||||
* Determine whether we can actually disable the walsender; this depends
|
||||
* on PostgreSQL version and user permissions.
|
||||
*/
|
||||
standby_disconnect_on_failover = can_disable_walsender(local_conn);
|
||||
|
||||
bool sibling_node_wal_receiver_connected = false;
|
||||
if (standby_disconnect_on_failover == true)
|
||||
{
|
||||
NodeInfoListCell *cell = NULL;
|
||||
NodeInfoList check_sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
int i;
|
||||
|
||||
bool sibling_node_wal_receiver_connected = false;
|
||||
|
||||
if (PQserverVersion(local_conn) < 90500)
|
||||
{
|
||||
log_warning(_("\"standby_disconnect_on_failover\" specified, but not available for this PostgreSQL version"));
|
||||
/* TODO: format server version */
|
||||
log_detail(_("available from PostgreSQL 9.5, this PostgreSQL version is %i"), PQserverVersion(local_conn));
|
||||
}
|
||||
else
|
||||
{
|
||||
disable_wal_receiver(local_conn);
|
||||
|
||||
/*
|
||||
@@ -2971,7 +2972,7 @@ do_primary_failover(void)
|
||||
log_debug("election result: %s", _print_election_result(election_result));
|
||||
|
||||
/* Reenable WAL receiver, if disabled */
|
||||
if (config_file_options.standby_disconnect_on_failover == true)
|
||||
if (standby_disconnect_on_failover == true)
|
||||
{
|
||||
/* adjust "wal_retrieve_retry_interval" but don't wait for WAL receiver to start */
|
||||
enable_wal_receiver(local_conn, false);
|
||||
|
||||
Reference in New Issue
Block a user