From b9ba97a36db3e0110f85f7e63bb244a9dceadf8b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Feb 2019 15:23:24 +0900 Subject: [PATCH] "standby switchover": check replication connection to upstream Ensure repmgr checks the standby (promotion candidate) is currently attached to the primary (demotion candidate). Addresses issue reported in GitHub #519. --- HISTORY | 2 ++ doc/appendix-release-notes.sgml | 24 ++++++++++++++++-------- repmgr-action-standby.c | 23 +++++++++++++++++------ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/HISTORY b/HISTORY index bf057b9e..429efc17 100644 --- a/HISTORY +++ b/HISTORY @@ -8,6 +8,8 @@ when checking received WAL location; GitHub #518 (Ian) repmgr: ensure "standby switchover" verifies repmgr can read the data directory on the demotion candidate; GitHub #523 (Ian) + repmgr: ensure "standby switchover" verifies replication connection + exists; GitHub #519 (Ian) repmgr: when executing "standby follow" and "node rejoin", check that it will actually be possible to stream from the target node (Ian) repmgr: "standby switchover": improve handling of connection URIs when diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml index b86c960a..56df7c60 100644 --- a/doc/appendix-release-notes.sgml +++ b/doc/appendix-release-notes.sgml @@ -105,14 +105,6 @@ - - - Add check repmgr standby switchover - when comparing received WAL on the standby to the primary's shutdown location to avoid a potential - race condition if the standby's walreceiver has not yet flushed all received WAL to disk. - GitHub #518. - - @@ -131,6 +123,22 @@ + + + &repmgr;: when executing repmgr standby switchover, + avoid a potential race condition when comparing received WAL on the standby to the primary's shutdown location, + as the standby's walreceiver may not have yet flushed all received WAL to disk. GitHub #518. + + + + + + + &repmgr;: when executing repmgr standby switchover, + verify the standby (promotion candidate) is currently attached to the primary (demotion candidate). GitHub #519. + + + repmgrd: on a cascaded standby, don't fail over if diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index dea953fa..48c11a19 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3124,7 +3124,7 @@ do_standby_switchover(void) if (record_status != RECORD_FOUND) { - log_error(_("unable to retrieve node record for currentr primary (node %i)"), + log_error(_("unable to retrieve node record for current primary (node %i)"), remote_node_id); PQfinish(local_conn); @@ -3154,8 +3154,6 @@ do_standby_switchover(void) /* * Check this standby is attached to the demotion candidate - * TODO: - * - check application_name in pg_stat_replication */ if (local_node_record.upstream_node_id != remote_node_record.node_id) @@ -3170,6 +3168,20 @@ do_standby_switchover(void) exit(ERR_BAD_CONFIG); } + if (is_downstream_node_attached(remote_conn, local_node_record.node_name) == false) + { + log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"), + local_node_record.node_name, + local_node_record.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_BAD_CONFIG); + } + log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name); /* this will fill the %p event notification parameter */ @@ -3352,6 +3364,8 @@ do_standby_switchover(void) exit(ERR_BAD_CONFIG); } + /* check remote repmgr has the data directory correctly configured */ + if (parse_data_directory_config(command_output.data) == false) { log_error(_("\"data_directory\" parameter in repmgr.conf on \"%s\" is incorrectly configured"), @@ -3376,9 +3390,6 @@ do_standby_switchover(void) } - /* check remote repmgr has the data directory correctly configured */ - - // - add repmgr node check --data-directory /* * populate local node record with current state of various replication-related