From 2c9700586cfb6174350b99edd4b92fad10a4d197 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Feb 2019 14:42:20 +0900 Subject: [PATCH] repmgr: "witness register" - check connection is to primary node Previously, if the witness server connection details were provided to "repmgr witness register" rather than those of the primary server, repmgr a) write the node record to the witness server rather than the primary, and b) would loop indefinitely trying to copy the node table to itself. Addresses GitHub #538. --- HISTORY | 4 ++- doc/appendix-release-notes.sgml | 8 ++++- doc/repmgr-witness-register.sgml | 8 +++++ repmgr-action-witness.c | 62 ++++++++++++++++++++++++++++---- 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/HISTORY b/HISTORY index 876573df..f74f1b7e 100644 --- a/HISTORY +++ b/HISTORY @@ -14,7 +14,9 @@ it will actually be possible to stream from the target node (Ian) repmgr: "standby switchover": improve handling of connection URIs when executing "node rejoin" on the demotion candidate; GitHub #525 (Ian) - repmgr: fix long node ID display in "cluster show"; (Ian) + repmgr: fix long node ID display in "cluster show" (Ian) + repmgr: check for primary server before executing "witness register"; + GitHub #538 (Ian) repmgrd: check binary and extension major versions match; GitHub #515 (Ian) repmgrd: on a cascaded standby, don't fail over if "failover=manual"; GitHub #531 (Ian) diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index ba5547e5..b185c99e 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -137,6 +137,13 @@ REPMGRD_OPTS="--daemonize=false" + + + &repmgr;: when executing repmgr witness register, + chech the node to connected is actually the primary (i.e. not the witness server). (GitHub #528). + + + &repmgr;: when executing repmgr standby switchover, @@ -315,7 +322,6 @@ REPMGRD_OPTS="--daemonize=false" - repmgrd: fix parsing of option. diff --git a/doc/repmgr-witness-register.sgml b/doc/repmgr-witness-register.sgml index 3f5e5b86..de6d1853 100644 --- a/doc/repmgr-witness-register.sgml +++ b/doc/repmgr-witness-register.sgml @@ -34,6 +34,14 @@ witness node's repmgr.conf, unless these are explicitly provided as command line options. + + + + The primary server must be registered with repmgr primary register before the witness + server can be registered. + + + Execute with the option to check what would happen without actually registering the witness server. diff --git a/repmgr-action-witness.c b/repmgr-action-witness.c index fdfb92b9..c313f01f 100644 --- a/repmgr-action-witness.c +++ b/repmgr-action-witness.c @@ -36,10 +36,12 @@ do_witness_register(void) { PGconn *witness_conn = NULL; PGconn *primary_conn = NULL; + int primary_node_id = UNKNOWN_NODE_ID; RecoveryType recovery_type = RECTYPE_UNKNOWN; ExtensionStatus extension_status = REPMGR_UNKNOWN; NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; t_node_info node_record = T_NODE_INFO_INITIALIZER; + t_node_info primary_node_record = T_NODE_INFO_INITIALIZER; RecordStatus record_status = RECORD_NOT_FOUND; bool record_created = false; @@ -125,6 +127,59 @@ do_witness_register(void) exit(ERR_BAD_CONFIG); } + + /* check we can determine the primary node */ + primary_node_id = get_primary_node_id(primary_conn); + + if (primary_node_id == UNKNOWN_NODE_ID) + { + log_error(_("unable to determine the cluster's primary node")); + log_hint(_("ensure the primary node connection details are correct and that it is registered")); + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + + record_status = get_node_record(primary_conn, primary_node_id, &primary_node_record); + PQfinish(primary_conn); + + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve record for primary node %i"), + primary_node_id); + + PQfinish(witness_conn); + + + exit(ERR_BAD_CONFIG); + } + + /* + * Reconnect to the primary node's conninfo - this will + * protect against the situation where the witness connection + * details were provided, and we're actually connected to the + * witness server. + */ + + primary_conn = establish_db_connection_quiet(primary_node_record.conninfo); + + if (PQstatus(primary_conn) != CONNECTION_OK) + { + log_error(_("unable to reconnect to the primary node (node %i)"), primary_node_id); + log_detail(_("primary node's conninfo is \"%s\""), primary_node_record.conninfo); + + PQfinish(witness_conn); + + exit(ERR_BAD_CONFIG); + } + + /* + * TODO: sanity check witness node is not part of main cluster; we could + * add a random application_name to the respective connections, + * and do a simple check of pg_stat_activity + */ + /* check that primary node is not a BDR node */ if (is_bdr_db_quiet(primary_conn) == true) { @@ -137,11 +192,6 @@ do_witness_register(void) exit(ERR_BAD_CONFIG); } - /* - * TODO: sanity check witness node is not part of main cluster; we could - * add a random application_name to the respective connections, - * and do a simple check of pg_stat_activity - */ /* create repmgr extension, if does not exist */ if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn)) @@ -275,7 +325,7 @@ do_witness_register(void) /* these values are mandatory, setting them to anything else has no point */ node_record.type = WITNESS; node_record.priority = 0; - node_record.upstream_node_id = get_primary_node_id(primary_conn); + node_record.upstream_node_id = primary_node_id; if (record_status == RECORD_FOUND) {