mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
repmgr: prevent a standby being cloned from a witness server
Previously repmgr would happily clone from whatever server it found at the provided source server address. We should ensure that a standby can only be cloned from a node which is part of the main replication cluster. This check fetches a list of nodes from the source server, connects to the first non-witness server it finds, and compares the system identifiers of the source node and the node it has connected to. If there is a mismatch, then the source server is clearly not part of the main replication cluster, and is most likely the witness server.
This commit is contained in:
1
HISTORY
1
HISTORY
@@ -8,6 +8,7 @@
|
|||||||
as reported by each individual node (Ian)
|
as reported by each individual node (Ian)
|
||||||
repmgr: in "cluster show" and "daemon status", check if a node is attached
|
repmgr: in "cluster show" and "daemon status", check if a node is attached
|
||||||
to its advertised upstream node
|
to its advertised upstream node
|
||||||
|
repmgr: prevent a standby being cloned from a witness server (Ian)
|
||||||
repmgrd: monitor standbys attached to primary (Ian)
|
repmgrd: monitor standbys attached to primary (Ian)
|
||||||
general: documentation converted to DocBook XML format (Ian)
|
general: documentation converted to DocBook XML format (Ian)
|
||||||
|
|
||||||
|
|||||||
39
dbutils.c
39
dbutils.c
@@ -1551,12 +1551,12 @@ get_ready_archive_files(PGconn *conn, const char *data_directory)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
identify_system(PGconn *repl_conn, t_system_identification *identification)
|
identify_system(PGconn *repl_conn, t_system_identification *identification)
|
||||||
{
|
{
|
||||||
PGresult *res = NULL;
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
/* semicolon required here */
|
||||||
res = PQexec(repl_conn, "IDENTIFY_SYSTEM;");
|
res = PQexec(repl_conn, "IDENTIFY_SYSTEM;");
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK || !PQntuples(res))
|
if (PQresultStatus(res) != PGRES_TUPLES_OK || !PQntuples(res))
|
||||||
@@ -1576,6 +1576,43 @@ identify_system(PGconn *repl_conn, t_system_identification *identification)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the system identifier by querying pg_control_system().
|
||||||
|
*
|
||||||
|
* Note there is a similar function in controldata.c ("get_system_identifier()")
|
||||||
|
* which reads the control file.
|
||||||
|
*/
|
||||||
|
uint64
|
||||||
|
system_identifier(PGconn *conn)
|
||||||
|
{
|
||||||
|
uint64 system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_control_system() was introduced in PostgreSQL 9.6
|
||||||
|
*/
|
||||||
|
if (PQserverVersion(conn) < 90600)
|
||||||
|
{
|
||||||
|
return UNKNOWN_SYSTEM_IDENTIFIER;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = PQexec(conn, "SELECT system_identifier FROM pg_catalog.pg_control_system()");
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_db_error(conn, NULL, _("get_system_identifier(): unable to query pg_control_system()"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
system_identifier = atol(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return system_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TimeLineHistoryEntry *
|
TimeLineHistoryEntry *
|
||||||
get_timeline_history(PGconn *repl_conn, TimeLineID tli)
|
get_timeline_history(PGconn *repl_conn, TimeLineID tli)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -440,8 +440,8 @@ RecoveryType get_recovery_type(PGconn *conn);
|
|||||||
int get_primary_node_id(PGconn *conn);
|
int get_primary_node_id(PGconn *conn);
|
||||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||||
|
uint64 system_identifier(PGconn *conn);
|
||||||
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
||||||
bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list);
|
|
||||||
|
|
||||||
/* repmgrd shared memory functions */
|
/* repmgrd shared memory functions */
|
||||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||||
@@ -484,6 +484,7 @@ bool get_primary_node_record(PGconn *conn, t_node_info *node_info);
|
|||||||
bool get_all_node_records(PGconn *conn, NodeInfoList *node_list);
|
bool get_all_node_records(PGconn *conn, NodeInfoList *node_list);
|
||||||
void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
|
void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
|
||||||
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
||||||
|
bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list);
|
||||||
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
||||||
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||||
bool get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
bool get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
||||||
|
|||||||
@@ -88,6 +88,14 @@
|
|||||||
warning if the node is not attached.
|
warning if the node is not attached.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>:
|
||||||
|
prevent a standby from being cloned from a witness server.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
@@ -4867,7 +4867,82 @@ check_source_server()
|
|||||||
|
|
||||||
log_warning(_("repmgr extension not found on source node"));
|
log_warning(_("repmgr extension not found on source node"));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If upstream is not a standby, retrieve its node records
|
||||||
|
* and attempt to connect to one; we'll then compare
|
||||||
|
* that node's system identifier to that of the source
|
||||||
|
* connection, to ensure we're cloning from a node which is
|
||||||
|
* part of the physical replication cluster. This is mainly
|
||||||
|
* to prevent cloning a standby from a witness server.
|
||||||
|
*
|
||||||
|
* Note that it doesn't matter if the node from the node record
|
||||||
|
* list is the same as the source node; also if the source node
|
||||||
|
* does not have any node records, there's not a lot we can do.
|
||||||
|
*
|
||||||
|
* This check will be only carried out on PostgreSQL 9.6 and
|
||||||
|
* later, as this is a precautionary check and we can retrieve the system
|
||||||
|
* identifier with a normal connection.
|
||||||
|
*/
|
||||||
|
if (get_recovery_type(source_conn) == RECTYPE_PRIMARY && PQserverVersion(source_conn) >= 90600)
|
||||||
|
{
|
||||||
|
uint64 source_system_identifier = system_identifier(source_conn);
|
||||||
|
|
||||||
|
if (source_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
|
||||||
|
{
|
||||||
|
NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
|
NodeInfoListCell *cell = NULL;
|
||||||
|
get_all_node_records(source_conn, &all_nodes);
|
||||||
|
|
||||||
|
log_debug("%i node records returned by source node", all_nodes.node_count);
|
||||||
|
|
||||||
|
/* loop through its nodes table */
|
||||||
|
|
||||||
|
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* exclude the witness node, as its system identifier will be different, of course */
|
||||||
|
if (cell->node_info->type == WITNESS)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
|
||||||
|
{
|
||||||
|
uint64 test_system_identifier = system_identifier(cell->node_info->conn);
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
|
||||||
|
if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
|
||||||
|
{
|
||||||
|
if (source_system_identifier != test_system_identifier)
|
||||||
|
{
|
||||||
|
log_error(_("source node's system identifier does not match other nodes in the replication cluster"));
|
||||||
|
log_detail(_("source node's system identifier is %lu, replication cluster member \"%s\"'s system identifier is %lu"),
|
||||||
|
source_system_identifier,
|
||||||
|
cell->node_info->node_name,
|
||||||
|
test_system_identifier);
|
||||||
|
log_hint(_("check that the source node is not a witness server"));
|
||||||
|
PQfinish(source_conn);
|
||||||
|
source_conn = NULL;
|
||||||
|
|
||||||
|
if (superuser_conn != NULL)
|
||||||
|
PQfinish(superuser_conn);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
/* identifiers match - our work here is done */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clear_node_info_list(&all_nodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
/* Fetch the source's data directory */
|
/* Fetch the source's data directory */
|
||||||
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
|
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user