From 09979eaa91dfe457a7f750a363757e0112dc0789 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 11 Jun 2019 15:14:17 +0900 Subject: [PATCH] note that "standby follow" requires a primary to be available While it's technically possible to have a standby follow another standby while the primary is not available, repmgr will not be able to update its metadata, which will cause Confusion and Chaos. Update the documentation to make this clear, and provide a more helpful error message if this situation occurs. The operation previously failed anyway, but with an unhelpful message about not being able to find a node record. --- HISTORY | 1 + doc/appendix-release-notes.xml | 9 ++++- doc/repmgr-standby-follow.xml | 73 ++++++++++++++++++---------------- repmgr-action-standby.c | 24 ++++++++--- 4 files changed, 66 insertions(+), 41 deletions(-) diff --git a/HISTORY b/HISTORY index 9cc1ea56..a327f4b2 100644 --- a/HISTORY +++ b/HISTORY @@ -24,6 +24,7 @@ repmgr: ensure BDR2-specific functionality cannot be used on BDR3 and later (Ian) repmgr: canonicalize the data directory path (Ian) + repmgr: note that "standby follow" requires a primary to be available (Ian) repmgrd: monitor standbys attached to primary (Ian) repmgrd: add "primary visibility consensus" functionality (Ian) repmgrd: fix memory leak which occurs while the monitored PostgreSQL diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index cb08a81e..68f7290e 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -43,6 +43,14 @@ + + + repmgr standby follow: + note that an active, reachable cluster primary is required for this command; + and provide a more helpful error message if no reachable primary could be found. + + + &repmgr;: when executing repmgr standby switchover, @@ -75,7 +83,6 @@ - repmgr standby promote: diff --git a/doc/repmgr-standby-follow.xml b/doc/repmgr-standby-follow.xml index 6ff8e306..b0f4a6b6 100644 --- a/doc/repmgr-standby-follow.xml +++ b/doc/repmgr-standby-follow.xml @@ -20,49 +20,54 @@ ("follow target"). Typically this will be the primary, but this command can also be used to attach the standby to another standby. + - This command requires a valid - repmgr.conf file for the standby, either specified - explicitly with -f/--config-file or located in a + This command requires a valid repmgr.conf file for the standby, + either specified explicitly with -f/--config-file or located in a default location; no additional arguments are required. - - By default &repmgr; will attempt to attach the standby to the current primary. - If is provided, &repmgr; will attempt - to attach the standby to the specified node, which can be another standby. - - - - This command will force a restart of the standby server, which must be - running. + The standby node ("follow candidate") must + be running. If the new upstream ("follow target") is not the primary, + the cluster primary must be running and accessible from the + standby node. - + - To re-add an inactive node to the replication cluster, use - . + To re-add an inactive node to the replication cluster, use + . - + - - repmgr standby follow will wait up to - standby_follow_timeout seconds (default: 30) - to verify the standby has actually connected to the new upstream node. - + + By default &repmgr; will attempt to attach the standby to the current primary. + If is provided, &repmgr; will attempt + to attach the standby to the specified node, which can be another standby. + - - - If is set for the standby, it - will not attach to the new upstream node until it has replayed available - WAL. - - - Conversely, if the standby is attached to an upstream standby - which has set, the upstream - standby's replay state may actually be behind that of its new downstream node. - - + + This command will force a restart of PostgreSQL on the standby node. + + + + repmgr standby follow will wait up to + standby_follow_timeout seconds (default: 30) + to verify the standby has actually connected to the new upstream node. + + + + + If is set for the standby, it + will not attach to the new upstream node until it has replayed available + WAL. + + + Conversely, if the standby is attached to an upstream standby + which has set, the upstream + standby's replay state may actually be behind that of its new downstream node. + + @@ -124,7 +129,7 @@ Note that when using &repmgrd;, should always be configured; - see Automatic failover configuration + see Automatic failover configuration for details. diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index f13f658f..304750c1 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2784,12 +2784,6 @@ do_standby_follow(void) PQfinish(local_conn); - if (runtime_options.dry_run == true) - { - log_info(_("prerequisites for executing STANDBY FOLLOW are met")); - exit(SUCCESS); - } - /* * Here we'll need a connection to the primary, if the upstream is not a primary. */ @@ -2802,12 +2796,30 @@ do_standby_follow(void) primary_conn = get_primary_connection_quiet(follow_target_conn, &primary_node_id, NULL); + + /* + * If follow target is not primary and no other primary could be found, + * abort because we won't be able to update the node record. + */ + if (PQstatus(primary_conn) != CONNECTION_OK) + { + log_error(_("unable to determine the cluster primary")); + log_detail(_("an active primary node is required for \"repmgr standby follow\"")); + PQfinish(follow_target_conn); + exit(ERR_FOLLOW_FAIL); + } } else { primary_conn = follow_target_conn; } + if (runtime_options.dry_run == true) + { + log_info(_("prerequisites for executing STANDBY FOLLOW are met")); + exit(SUCCESS); + } + initPQExpBuffer(&follow_output); success = do_standby_follow_internal(