diff --git a/HISTORY b/HISTORY index 320e8ade..412b422d 100644 --- a/HISTORY +++ b/HISTORY @@ -8,6 +8,7 @@ as reported by each individual node (Ian) repmgr: in "cluster show" and "daemon status", check if a node is attached to its advertised upstream node + repmgr: prevent a standby being cloned from a witness server (Ian) repmgrd: monitor standbys attached to primary (Ian) general: documentation converted to DocBook XML format (Ian) diff --git a/dbutils.c b/dbutils.c index 6b1137f4..a29d5d50 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1551,12 +1551,12 @@ get_ready_archive_files(PGconn *conn, const char *data_directory) } - bool identify_system(PGconn *repl_conn, t_system_identification *identification) { PGresult *res = NULL; + /* semicolon required here */ res = PQexec(repl_conn, "IDENTIFY_SYSTEM;"); if (PQresultStatus(res) != PGRES_TUPLES_OK || !PQntuples(res)) @@ -1576,6 +1576,43 @@ identify_system(PGconn *repl_conn, t_system_identification *identification) } +/* + * Return the system identifier by querying pg_control_system(). + * + * Note there is a similar function in controldata.c ("get_system_identifier()") + * which reads the control file. + */ +uint64 +system_identifier(PGconn *conn) +{ + uint64 system_identifier = UNKNOWN_SYSTEM_IDENTIFIER; + PGresult *res = NULL; + + /* + * pg_control_system() was introduced in PostgreSQL 9.6 + */ + if (PQserverVersion(conn) < 90600) + { + return UNKNOWN_SYSTEM_IDENTIFIER; + } + + res = PQexec(conn, "SELECT system_identifier FROM pg_catalog.pg_control_system()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_db_error(conn, NULL, _("get_system_identifier(): unable to query pg_control_system()")); + } + else + { + system_identifier = atol(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return system_identifier; +} + + TimeLineHistoryEntry * get_timeline_history(PGconn *repl_conn, TimeLineID tli) { diff --git a/dbutils.h b/dbutils.h index bb7e7d38..020b5fcf 100644 --- a/dbutils.h +++ b/dbutils.h @@ -440,8 +440,8 @@ RecoveryType get_recovery_type(PGconn *conn); int get_primary_node_id(PGconn *conn); int get_ready_archive_files(PGconn *conn, const char *data_directory); bool identify_system(PGconn *repl_conn, t_system_identification *identification); +uint64 system_identifier(PGconn *conn); TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli); -bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list); /* repmgrd shared memory functions */ bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); @@ -484,6 +484,7 @@ bool get_primary_node_record(PGconn *conn, t_node_info *node_info); bool get_all_node_records(PGconn *conn, NodeInfoList *node_list); void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes); void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list); +bool get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list); void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list); bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list); bool get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list); diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index c2e3a39c..d354d656 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -88,6 +88,14 @@ warning if the node is not attached. + + + + repmgr standby clone: + prevent a standby from being cloned from a witness server. + + + diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 8e61454f..f96a2ed0 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -4867,7 +4867,82 @@ check_source_server() log_warning(_("repmgr extension not found on source node")); } + else + { + /* + * If upstream is not a standby, retrieve its node records + * and attempt to connect to one; we'll then compare + * that node's system identifier to that of the source + * connection, to ensure we're cloning from a node which is + * part of the physical replication cluster. This is mainly + * to prevent cloning a standby from a witness server. + * + * Note that it doesn't matter if the node from the node record + * list is the same as the source node; also if the source node + * does not have any node records, there's not a lot we can do. + * + * This check will be only carried out on PostgreSQL 9.6 and + * later, as this is a precautionary check and we can retrieve the system + * identifier with a normal connection. + */ + if (get_recovery_type(source_conn) == RECTYPE_PRIMARY && PQserverVersion(source_conn) >= 90600) + { + uint64 source_system_identifier = system_identifier(source_conn); + if (source_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER) + { + NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER; + NodeInfoListCell *cell = NULL; + get_all_node_records(source_conn, &all_nodes); + + log_debug("%i node records returned by source node", all_nodes.node_count); + + /* loop through its nodes table */ + + for (cell = all_nodes.head; cell; cell = cell->next) + { + + /* exclude the witness node, as its system identifier will be different, of course */ + if (cell->node_info->type == WITNESS) + continue; + + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); + if (PQstatus(cell->node_info->conn) == CONNECTION_OK) + { + uint64 test_system_identifier = system_identifier(cell->node_info->conn); + PQfinish(cell->node_info->conn); + + if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER) + { + if (source_system_identifier != test_system_identifier) + { + log_error(_("source node's system identifier does not match other nodes in the replication cluster")); + log_detail(_("source node's system identifier is %lu, replication cluster member \"%s\"'s system identifier is %lu"), + source_system_identifier, + cell->node_info->node_name, + test_system_identifier); + log_hint(_("check that the source node is not a witness server")); + PQfinish(source_conn); + source_conn = NULL; + + if (superuser_conn != NULL) + PQfinish(superuser_conn); + + exit(ERR_BAD_CONFIG); + } + /* identifiers match - our work here is done */ + break; + } + } + else + { + PQfinish(cell->node_info->conn); + } + } + clear_node_info_list(&all_nodes); + } + } + } /* Fetch the source's data directory */ get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);