From 16296bb1c338ca47933c50b9130d82f95c6d47a4 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 1 Feb 2016 15:10:03 +0900 Subject: [PATCH 001/113] Bump dev version number --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 5e0acca8..78de4b4f 100644 --- a/version.h +++ b/version.h @@ -1,6 +1,6 @@ #ifndef _VERSION_H_ #define _VERSION_H_ -#define REPMGR_VERSION "3.1" +#define REPMGR_VERSION "3.2dev" #endif From 827ffef5f9a411e4bd1de2cf6125b26de8d8978c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 9 Feb 2016 15:11:50 +0900 Subject: [PATCH 002/113] Add '-P/--pwprompt' option for "repmgr create witness" Optionally prompt for superuser and repmgr user when creating a witness. This ensures a password can be provided if the primary's pg_hba.conf mandates it. This deprecates '--initdb-no-pwprompt'; and changes the default behaviour of "repmgr create witness", which previously required a superuser password unless '--initdb-no-pwprompt' was supplied. This behaviour is more consistent with other PostgreSQL utilities such as createuser. Partial fix for GitHub issue #145. --- HISTORY | 5 ++++- repmgr.c | 24 +++++++++++++++++++----- repmgr.h | 7 ++++--- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/HISTORY b/HISTORY index 0b4b8747..e9a27ec3 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,7 @@ -3.1.0 2016-01- +3.1.1 2016-02- + Add '-P/--pwprompt' option for "repmgr create witness" (Ian) + +3.1.0 2016-02-01 Add "repmgr standby switchover" command (Ian) Revised README file (Ian) Remove requirement for 'archive_mode' to be enabled (Ian) diff --git a/repmgr.c b/repmgr.c index dcd16a70..32967560 100644 --- a/repmgr.c +++ b/repmgr.c @@ -175,12 +175,14 @@ main(int argc, char **argv) {"terse", required_argument, NULL, 't'}, {"mode", required_argument, NULL, 'm'}, {"remote-config-file", required_argument, NULL, 'C'}, + /* deprecated from 3.2; replaced with -P/----pwprompt */ {"initdb-no-pwprompt", no_argument, NULL, 1}, {"check-upstream-config", no_argument, NULL, 2}, {"recovery-min-apply-delay", required_argument, NULL, 3}, {"ignore-external-config-files", no_argument, NULL, 4}, {"config-archive-dir", required_argument, NULL, 5}, {"pg_rewind", optional_argument, NULL, 6}, + {"pwprompt", optional_argument, NULL, 7}, {"help", no_argument, NULL, '?'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0} @@ -405,6 +407,9 @@ main(int argc, char **argv) } pg_rewind_supplied = true; break; + case 7: + runtime_options.witness_pwprompt = true; + break; default: { @@ -3454,7 +3459,7 @@ do_witness_create(void) maxlen_snprintf(script, "%s %s -D %s init -o \"%s-U %s\"", make_pg_path("pg_ctl"), options.pg_ctl_options, runtime_options.dest_dir, - runtime_options.initdb_no_pwprompt ? "" : "-W ", + runtime_options.witness_pwprompt ? "-W " : "", runtime_options.superuser); log_info(_("initializing cluster for witness: %s.\n"), script); @@ -3555,10 +3560,13 @@ do_witness_create(void) /* check if we need to create a user */ if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0) { - /* create required user; needs to be superuser to create untrusted language function in c */ - maxlen_snprintf(script, "%s -p %s --superuser --login -U %s %s", + /* create required user; needs to be superuser to create untrusted language function in C */ + maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s", make_pg_path("createuser"), - runtime_options.localport, runtime_options.superuser, runtime_options.username); + runtime_options.localport, + runtime_options.witness_pwprompt ? "-P " : "", + runtime_options.superuser, + runtime_options.username); log_info(_("creating user for witness db: %s.\n"), script); r = system(script); @@ -3829,7 +3837,8 @@ do_help(void) printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \ " optionally providing a path to the binary\n")); printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n")); - printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n")); +/* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/ + printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n")); printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \ " (default: postgres)\n")); printf(_("\n")); @@ -4275,6 +4284,11 @@ check_parameters_for_action(const int action) config_file_required = false; break; case WITNESS_CREATE: + /* Require data directory */ + if (strcmp(runtime_options.dest_dir, "") == 0) + { + error_list_append(&cli_errors, _("-D/--data-dir required when executing WITNESS CREATE")); + } /* allow all parameters to be supplied */ break; case CLUSTER_SHOW: diff --git a/repmgr.h b/repmgr.h index da16a10c..ad8a34d7 100644 --- a/repmgr.h +++ b/repmgr.h @@ -67,7 +67,7 @@ typedef struct bool force; bool wait_for_master; bool ignore_rsync_warn; - bool initdb_no_pwprompt; + bool witness_pwprompt; bool rsync_only; bool fast_checkpoint; bool ignore_external_config_files; @@ -91,11 +91,12 @@ typedef struct char recovery_min_apply_delay[MAXLEN]; - /* deprecated command line option */ + /* deprecated command line options */ char localport[MAXLEN]; + bool initdb_no_pwprompt; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "" } +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } extern char repmgr_schema[MAXLEN]; extern bool config_file_found; From 49420c437f6867b54ef3beaec3272080d1fd6292 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 9 Feb 2016 15:58:51 +0900 Subject: [PATCH 003/113] README.md: update witness server section --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4aff5a07..3e4dc4b5 100644 --- a/README.md +++ b/README.md @@ -1042,7 +1042,6 @@ makes sense to create a witness server in conjunction with running `repmgrd`; the witness server will require its own `repmgrd` instance. - repmgrd and cascading replication --------------------------------- @@ -1249,8 +1248,21 @@ which contains connection details for the local database. Note that it only makes sense to create a witness server if `repmgrd` is in use; see section "witness server" above. + This command requires a `repmgr.conf` file containing a valid conninfo + string for the server to be created, as well as the other minimum required + parameters detailed in the section `repmgr configuration file` above. + By default the witness server will use port 5499 to facilitate easier setup - on a server running an existing node. + on a server running an existing node. To use a different port, supply + this explicitly in the `repmgr.conf` conninfo string. + + This command also requires the location of the witness server's data + directory to be provided (`-D/--datadir`) as well as valid connection + parameters for the master server. + + By default this command will create a superuser and a repmgr user. + The `repmgr` user name will be extracted from the `conninfo` string + in `repmgr.conf`. * `cluster show` From c8f449f178d8f6349fda646a31ab9a2aa806c989 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 9 Feb 2016 15:59:28 +0900 Subject: [PATCH 004/113] witness creation: extract database and user names from the local conninfo string 99.9% of the time they'll be the same as the primary connection, but it's more consistent to use the provided local conninfo string (from which the port is already extracted). --- repmgr.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/repmgr.c b/repmgr.c index 32967560..e894a457 100644 --- a/repmgr.c +++ b/repmgr.c @@ -3363,6 +3363,8 @@ do_witness_create(void) char master_hba_file[MAXLEN]; bool success; bool record_created; + char repmgr_user[MAXLEN]; + char repmgr_db[MAXLEN]; /* Connection parameters for master only */ keywords[0] = "host"; @@ -3370,6 +3372,13 @@ do_witness_create(void) keywords[1] = "port"; values[1] = runtime_options.masterport; + /* + * Extract the repmgr user and database names from the conninfo string + * provided in repmgr.conf + */ + get_conninfo_value(options.conninfo, "user", repmgr_user); + get_conninfo_value(options.conninfo, "dbname", repmgr_db); + /* We need to connect to check configuration and copy it */ masterconn = establish_db_connection_by_params(keywords, values, true); if (!masterconn) @@ -3505,8 +3514,8 @@ do_witness_create(void) xsnprintf(buf, sizeof(buf), "\n#Configuration added by %s\n", progname()); fputs(buf, pg_conf); - - /* Attempt to extract a port number from the provided conninfo string + /* + * Attempt to extract a port number from the provided conninfo string. * This will override any value provided with '-l/--local-port', as it's * what we'll later try and connect to anyway. '-l/--local-port' should * be deprecated. @@ -3557,16 +3566,18 @@ do_witness_create(void) exit(ERR_BAD_CONFIG); } + /* check if we need to create a user */ - if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0) + if (strcmp(repmgr_user, "postgres") != 0) { - /* create required user; needs to be superuser to create untrusted language function in C */ + /* create required user; needs to be superuser to create untrusted + * language function in C */ maxlen_snprintf(script, "%s -p %s --superuser --login %s-U %s %s", make_pg_path("createuser"), runtime_options.localport, runtime_options.witness_pwprompt ? "-P " : "", runtime_options.superuser, - runtime_options.username); + repmgr_user); log_info(_("creating user for witness db: %s.\n"), script); r = system(script); @@ -3592,7 +3603,10 @@ do_witness_create(void) /* create required db */ maxlen_snprintf(script, "%s -p %s -U %s --owner=%s %s", make_pg_path("createdb"), - runtime_options.localport, runtime_options.superuser, runtime_options.username, runtime_options.dbname); + runtime_options.localport, + runtime_options.superuser, + repmgr_user, + repmgr_db); log_info("creating database for witness db: %s.\n", script); r = system(script); @@ -3749,18 +3763,18 @@ do_witness_create(void) } /* drop superuser powers if needed */ - if (runtime_options.username[0] && runtime_options.localport[0] && strcmp(runtime_options.username,"postgres") != 0) + if (strcmp(repmgr_user, "postgres") != 0) { - sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", runtime_options.username); + sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user); log_info(_("revoking superuser status on user %s: %s.\n"), - runtime_options.username, sqlquery); + repmgr_user, sqlquery); log_debug(_("witness create: %s\n"), sqlquery); res = PQexec(witnessconn, sqlquery); if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) { log_err(_("unable to alter user privileges for user %s: %s\n"), - runtime_options.username, + repmgr_user, PQerrorMessage(witnessconn)); PQfinish(masterconn); PQfinish(witnessconn); From 7241391ddc4fb800ac4ba8ea60eb17de2f5d40e7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 16 Feb 2016 14:26:32 +0900 Subject: [PATCH 005/113] Better handling of errors during witness creation Ensure witness is only registered after all steps for creation have been successfully completed. Also write an event record if connection could not be made to the witness server after initial creation. This addresses GitHub issue #146. --- repmgr.c | 143 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/repmgr.c b/repmgr.c index e894a457..edfc0a9e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -751,6 +751,8 @@ do_cluster_show(void) " FROM %s.repl_show_nodes", get_repmgr_schema_quoted(conn)); + log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery ); + res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -3632,7 +3634,7 @@ do_witness_create(void) if (success == false) { - char *errmsg = _("unable to retrieve location of pg_hba.conf"); + char *errmsg = _("Unable to retrieve location of pg_hba.conf"); log_err("%s\n", errmsg); create_event_record(masterconn, @@ -3649,7 +3651,7 @@ do_witness_create(void) master_hba_file, runtime_options.dest_dir, false, -1); if (r != 0) { - char *errmsg = _("unable to copy pg_hba.conf from master"); + char *errmsg = _("Unable to copy pg_hba.conf from master"); log_err("%s\n", errmsg); create_event_record(masterconn, @@ -3663,7 +3665,7 @@ do_witness_create(void) exit(ERR_BAD_CONFIG); } - /* reload to adapt for changed pg_hba.conf */ + /* reload witness server to activate the copied pg_hba.conf */ maxlen_snprintf(script, "%s %s -w -D %s reload", make_pg_path("pg_ctl"), options.pg_ctl_options, runtime_options.dest_dir); @@ -3685,7 +3687,81 @@ do_witness_create(void) exit(ERR_BAD_CONFIG); } - /* register ourselves in the master */ + /* establish a connection to the witness, and create the schema */ + witnessconn = establish_db_connection(options.conninfo, false); + + if (PQstatus(witnessconn) != CONNECTION_OK) + { + create_event_record(masterconn, + &options, + options.node, + "witness_create", + false, + _("Unable to connect to witness servetr")); + PQfinish(masterconn); + exit(ERR_BAD_CONFIG); + } + + log_info(_("starting copy of configuration from master...\n")); + + begin_transaction(witnessconn); + + + if (!create_schema(witnessconn)) + { + rollback_transaction(witnessconn); + create_event_record(masterconn, + &options, + options.node, + "witness_create", + false, + _("Unable to create schema on witness")); + PQfinish(masterconn); + PQfinish(witnessconn); + exit(ERR_BAD_CONFIG); + } + + commit_transaction(witnessconn); + + /* copy configuration from master, only repl_nodes is needed */ + if (!copy_configuration(masterconn, witnessconn, options.cluster_name)) + { + create_event_record(masterconn, + &options, + options.node, + "witness_create", + false, + _("Unable to copy configuration from master")); + PQfinish(masterconn); + PQfinish(witnessconn); + exit(ERR_BAD_CONFIG); + } + + /* drop superuser powers if needed */ + if (strcmp(repmgr_user, "postgres") != 0) + { + sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user); + log_info(_("revoking superuser status on user %s: %s.\n"), + repmgr_user, sqlquery); + + log_debug(_("witness create: %s\n"), sqlquery); + res = PQexec(witnessconn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_err(_("Unable to alter user privileges for user %s: %s\n"), + repmgr_user, + PQerrorMessage(witnessconn)); + PQfinish(masterconn); + PQfinish(witnessconn); + exit(ERR_DB_QUERY); + } + } + + /* Finished with the witness server */ + + PQfinish(witnessconn); + + /* Finally, register ourselves with the master */ if (runtime_options.force) { @@ -3724,64 +3800,6 @@ do_witness_create(void) exit(ERR_DB_QUERY); } - /* establish a connection to the witness, and create the schema */ - witnessconn = establish_db_connection(options.conninfo, true); - - log_info(_("starting copy of configuration from master...\n")); - - begin_transaction(witnessconn); - - - if (!create_schema(witnessconn)) - { - rollback_transaction(witnessconn); - create_event_record(masterconn, - &options, - options.node, - "witness_create", - false, - _("unable to create schema on witness")); - PQfinish(masterconn); - PQfinish(witnessconn); - exit(ERR_BAD_CONFIG); - } - - commit_transaction(witnessconn); - - /* copy configuration from master, only repl_nodes is needed */ - if (!copy_configuration(masterconn, witnessconn, options.cluster_name)) - { - create_event_record(masterconn, - &options, - options.node, - "witness_create", - false, - _("Unable to copy configuration from master")); - PQfinish(masterconn); - PQfinish(witnessconn); - exit(ERR_BAD_CONFIG); - } - - /* drop superuser powers if needed */ - if (strcmp(repmgr_user, "postgres") != 0) - { - sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user); - log_info(_("revoking superuser status on user %s: %s.\n"), - repmgr_user, sqlquery); - - log_debug(_("witness create: %s\n"), sqlquery); - res = PQexec(witnessconn, sqlquery); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) - { - log_err(_("unable to alter user privileges for user %s: %s\n"), - repmgr_user, - PQerrorMessage(witnessconn)); - PQfinish(masterconn); - PQfinish(witnessconn); - exit(ERR_DB_QUERY); - } - } - /* Log the event */ create_event_record(masterconn, &options, @@ -3791,7 +3809,6 @@ do_witness_create(void) NULL); PQfinish(masterconn); - PQfinish(witnessconn); log_notice(_("configuration has been successfully copied to the witness\n")); } From c6e1bc205a915d8eff53fec2ffd1f9a54d9861b9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 22 Feb 2016 14:58:17 +0900 Subject: [PATCH 006/113] Prevent repmgr/repmgrd running as root --- HISTORY | 1 + repmgr.c | 15 ++++++++++++++- repmgrd.c | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/HISTORY b/HISTORY index e9a27ec3..37961114 100644 --- a/HISTORY +++ b/HISTORY @@ -1,5 +1,6 @@ 3.1.1 2016-02- Add '-P/--pwprompt' option for "repmgr create witness" (Ian) + Prevent repmgr/repmgrd running as root (Ian) 3.1.0 2016-02-01 Add "repmgr standby switchover" command (Ian) diff --git a/repmgr.c b/repmgr.c index edfc0a9e..37f38eaa 100644 --- a/repmgr.c +++ b/repmgr.c @@ -198,6 +198,19 @@ main(int argc, char **argv) set_progname(argv[0]); + /* Disallow running as root to prevent directory ownership problems */ + if (geteuid() == 0) + { + fprintf(stderr, + _("%s: cannot be run as root\n" + "Please log in (using, e.g., \"su\") as the " + "(unprivileged) user that owns\n" + "the data directory.\n" + ), + progname()); + exit(1); + } + /* Initialise some defaults */ /* set default user */ @@ -212,7 +225,7 @@ main(int argc, char **argv) } else { - fprintf(stderr, "could not get current user name: %s\n", strerror(errno)); + fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno)); exit(ERR_BAD_CONFIG); } } diff --git a/repmgrd.c b/repmgrd.c index 654c1898..bb4d3d06 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -142,6 +142,20 @@ main(int argc, char **argv) set_progname(argv[0]); + /* Disallow running as root to prevent directory ownership problems */ + if (geteuid() == 0) + { + fprintf(stderr, + _("%s: cannot be run as root\n" + "Please log in (using, e.g., \"su\") as the " + "(unprivileged) user that owns " + "the data directory.\n" + ), + progname()); + exit(1); + } + + while ((c = getopt_long(argc, argv, "?Vf:vmdp:", long_options, &optindex)) != -1) { switch (c) From 2fe3b3c2a3203aa00ea736df8127a0a41744dca0 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 22 Feb 2016 09:16:11 -0300 Subject: [PATCH 007/113] Fix a few paragraphs from the README.md. --- README.md | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 3e4dc4b5..eaa4fad7 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,14 @@ provides a single read/write master server and one or more read-only standbys containing near-real time copies of the master server's database. For a multi-master replication solution, please see 2ndQuadrant's BDR -(bi-directional replication) extension. For selective replication, e.g. -of individual tables or databases from one server to another, please -see 2ndQuadrant's pglogical extension. +(bi-directional replication) extension. +http://2ndquadrant.com/en-us/resources/bdr/ + +For selective replication, e.g. of individual tables or databases from one server +to another, please see 2ndQuadrant's pglogical extension. + +http://2ndquadrant.com/en-us/resources/pglogical/ ### Concepts @@ -117,8 +121,8 @@ views: The `repmgr` metadata schema can be stored in an existing database or in its own dedicated database. -A dedicated superuser is required to own the meta-database as well as carry out -administrative actions. +A dedicated database superuser is required to own the meta-database as well as carry +out administrative actions. Installation ------------ @@ -128,7 +132,9 @@ Installation `repmgr` is developed and tested on Linux and OS X, but should work on any UNIX-like system supported by PostgreSQL itself. -`repmgr` supports PostgreSQL from version 9.3. +Current versions of `repmgr` support PostgreSQL from version 9.3. If you are +interested in using `repmgr` on earlier versions of PostgreSQL you can download +version 2.1 which supports PostgreSQL from version 9.1. All servers in the replication cluster must be running the same major version of PostgreSQL, and we recommend that they also run the same minor version. @@ -137,7 +143,7 @@ The `repmgr` tools must be installed on each server in the replication cluster. A dedicated system user for `repmgr` is *not* required; as many `repmgr` and `repmgrd` actions require direct access to the PostgreSQL data directory, -it should executed by the `postgres` user. +it should be executed by the `postgres` user. Additionally, we recommend installing `rsync` and enabling passwordless `ssh` connectivity between all servers in the replication cluster. @@ -364,11 +370,11 @@ Clone the standby with: [2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server [2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start -This will clone the PostgreSQL data directory files from the master using -PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the -correct parameters to start streaming from the master server will be created +This will clone the PostgreSQL data directory files from the master at repmgr_node1 +using PostgreSQL's pg_basebackup utility. A `recovery.conf` file containing the +correct parameters to start streaming from this master server will be created automatically, and unless otherwise the `postgresql.conf` and `pg_hba.conf` -files will be copied. +files will be copied from the master. Make any adjustments to the PostgreSQL configuration files now, then start the standby server. @@ -702,16 +708,16 @@ Performing a switchover with repmgr A typical use-case for replication is a combination of master and standby server, with the standby serving as a backup which can easily be activated in case of a problem with the master. Such an unplanned failover would -normally be handled by promoting the standby, after which appropriate action -taken to restore the old master. +normally be handled by promoting the standby, after which an appropriate +action must be taken to restore the old master. In some cases however it's desirable to promote the standby in a planned way, e.g. so maintenance can be performed on the master; this kind of switchover is supported by the `repmgr standby switchover` command. `repmgr standby switchover` differs from other `repmgr` actions in that it -also performs actions on another server, for which reason both passwordless -SSH access and the path of `repmgr.conf` on that server. +also performs actions on another server, for which reason you must provide +both passwordless SSH access and the path of `repmgr.conf` on that server. * * * From e1b8982c14eb7ee8a4bc89c57524e8ee209dcf54 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 18 Feb 2016 17:46:33 +0900 Subject: [PATCH 008/113] Ensure witness node is registered before the repl_nodes table is copied This fixes a bug introduced into the previous commit, where the witness node was registered last to prevent a spurious node record being created even if witness server creation failed. --- repmgr.c | 94 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/repmgr.c b/repmgr.c index 37f38eaa..72a0c829 100644 --- a/repmgr.c +++ b/repmgr.c @@ -3715,11 +3715,11 @@ do_witness_create(void) exit(ERR_BAD_CONFIG); } + log_info(_("starting copy of configuration from master...\n")); begin_transaction(witnessconn); - if (!create_schema(witnessconn)) { rollback_transaction(witnessconn); @@ -3736,45 +3736,11 @@ do_witness_create(void) commit_transaction(witnessconn); - /* copy configuration from master, only repl_nodes is needed */ - if (!copy_configuration(masterconn, witnessconn, options.cluster_name)) - { - create_event_record(masterconn, - &options, - options.node, - "witness_create", - false, - _("Unable to copy configuration from master")); - PQfinish(masterconn); - PQfinish(witnessconn); - exit(ERR_BAD_CONFIG); - } - - /* drop superuser powers if needed */ - if (strcmp(repmgr_user, "postgres") != 0) - { - sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user); - log_info(_("revoking superuser status on user %s: %s.\n"), - repmgr_user, sqlquery); - - log_debug(_("witness create: %s\n"), sqlquery); - res = PQexec(witnessconn, sqlquery); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) - { - log_err(_("Unable to alter user privileges for user %s: %s\n"), - repmgr_user, - PQerrorMessage(witnessconn)); - PQfinish(masterconn); - PQfinish(witnessconn); - exit(ERR_DB_QUERY); - } - } - - /* Finished with the witness server */ - - PQfinish(witnessconn); - - /* Finally, register ourselves with the master */ + /* + * Register new witness server on the primary + * Do this as late as possible to avoid having to delete + * the record if the server creation fails + */ if (runtime_options.force) { @@ -3813,6 +3779,54 @@ do_witness_create(void) exit(ERR_DB_QUERY); } + + /* copy configuration from master, only repl_nodes is needed */ + if (!copy_configuration(masterconn, witnessconn, options.cluster_name)) + { + create_event_record(masterconn, + &options, + options.node, + "witness_create", + false, + _("Unable to copy configuration from master")); + + /* + * delete previously created witness node record + * XXX maybe set inactive? + */ + delete_node_record(masterconn, + options.node, + "witness create"); + + PQfinish(masterconn); + PQfinish(witnessconn); + exit(ERR_BAD_CONFIG); + } + + /* drop superuser powers if needed */ + if (strcmp(repmgr_user, "postgres") != 0) + { + sqlquery_snprintf(sqlquery, "ALTER ROLE %s NOSUPERUSER", repmgr_user); + log_info(_("revoking superuser status on user %s: %s.\n"), + repmgr_user, sqlquery); + + log_debug(_("witness create: %s\n"), sqlquery); + res = PQexec(witnessconn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_err(_("Unable to alter user privileges for user %s: %s\n"), + repmgr_user, + PQerrorMessage(witnessconn)); + PQfinish(masterconn); + PQfinish(witnessconn); + exit(ERR_DB_QUERY); + } + } + + /* Finished with the witness server */ + + PQfinish(witnessconn); + /* Log the event */ create_event_record(masterconn, &options, From d9961bbb17ec57db7ce445b120bbfd80d455294d Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 23 Feb 2016 14:25:26 +0900 Subject: [PATCH 009/113] Minor fixes to README.md --- README.md | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index eaa4fad7..e76335b5 100644 --- a/README.md +++ b/README.md @@ -113,8 +113,8 @@ tables: - `repl_monitor`: historical standby monitoring information written by `repmgrd` views: - - `repl_show_nodes`: based on the `repl_nodes` showing name of the server's - upstream node + - `repl_show_nodes`: based on the table `repl_nodes`, additionally showing the + name of the server's upstream node - `repl_status`: when `repmgrd`'s monitoring is enabled, shows current monitoring status for each node @@ -192,7 +192,8 @@ PostgreSQL itself. `repmgr` and `repmgrd` use a common configuration file, by default called `repmgr.conf` (although any name can be used if explicitly specified). At the very least, `repmgr.conf` must contain the connection parameters -for the local `repmgr` database. +for the local `repmgr` database; see `repmgr configuration file` below +for more details. The configuration file will be searched for in the following locations: @@ -383,12 +384,12 @@ standby server. > *NOTE*: `repmgr standby clone` does not require `repmgr.conf`, however we > recommend providing this as `repmgr` will set the `application_name` parameter -> in `recovery.conf` as value provided in `node_name`, making it easier to identify -> the node in `pg_stat_replication`. It's also possible to provide some advanced -> options for controlling the standby cloning process; see next section for -> details. +> in `recovery.conf` as the value provided in `node_name`, making it easier to +> identify the node in `pg_stat_replication`. It's also possible to provide some +> advanced options for controlling the standby cloning process; see next section +> for details. -*** +* * * ### Verify replication is functioning @@ -431,20 +432,20 @@ table: 2 | standby | 1 | test | node2 | host=repmgr_node2 dbname=repmgr user=repmgr | | 100 | t (2 rows) -The standby server now has a copy of records for all servers in the replication -cluster. Note that the relationship between master and standby is explicitly -defined via the `upstream_node_id` value, which shows here that the standby's -upstream server is the replication cluster master. While of limited use -in a simple master/standby replication cluster, this information is required +The standby server now has a copy of the records for all servers in the +replication cluster. Note that the relationship between master and standby is +explicitly defined via the `upstream_node_id` value, which shows here that the +standby's upstream server is the replication cluster master. While of limited +use in a simple master/standby replication cluster, this information is required to effectively manage cascading replication (see below). Advanced options for cloning a standby -------------------------------------- -The above section demonstrates the simplest possible way to clone -a standby server. Depending on your situation, finer-grained control -over the cloning process may be necessary. +The above section demonstrates the simplest possible way to cloneb a standby +server. Depending on your circumstances, finer-grained controlover the cloning +process may be necessary. ### pg_basebackup options when cloning a standby @@ -455,8 +456,8 @@ However this may impact performance of the server being cloned from so should be used with care. Further options can be passed to the `pg_basebackup` utility via -the `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL -documentation for more details: +the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL +documentation for more details of available options: http://www.postgresql.org/docs/current/static/app-pgbasebackup.html ### Using rsync to clone a standby @@ -480,7 +481,7 @@ fresh clone with `pg_basebackup`. By default, `repmgr` will attempt to copy the standard configuration files (`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located -outside of the data directory (though note currently they will be copied +outside of the data directory (though currently they will be copied into the standby's data directory). To prevent this happening, when executing `repmgr standby clone` provide the `--ignore-external-config-files` option. @@ -1164,7 +1165,7 @@ configuration file is located if `-f/--config-file` is not supplied. ### repmgr commands The `repmgr` command line tool accepts commands for specific servers in the -replication in the format "`server type` `action`", or for the entire +replication in the format "`server_type` `action`", or for the entire replication cluster in the format "`cluster` `action`". Each command is described below. @@ -1252,7 +1253,7 @@ which contains connection details for the local database. time a failover occurs. Note that it only makes sense to create a witness server if `repmgrd` - is in use; see section "witness server" above. + is in use; see section "Using a witness server" above. This command requires a `repmgr.conf` file containing a valid conninfo string for the server to be created, as well as the other minimum required @@ -1274,7 +1275,7 @@ which contains connection details for the local database. Displays information about each active node in the replication cluster. This command polls each registered server and shows its role (master / standby / - witness) or "FAILED" if the node doesn't respond. It polls each server + witness) or `FAILED` if the node doesn't respond. It polls each server directly and can be run on any node in the cluster; this is also useful when analyzing connectivity from a particular node. From 62bb3db1f89aefec2b162ca776c8ae089102fdc3 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 24 Feb 2016 15:27:08 +0900 Subject: [PATCH 010/113] Fix code comment --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 72a0c829..03fe8edd 100644 --- a/repmgr.c +++ b/repmgr.c @@ -175,7 +175,7 @@ main(int argc, char **argv) {"terse", required_argument, NULL, 't'}, {"mode", required_argument, NULL, 'm'}, {"remote-config-file", required_argument, NULL, 'C'}, - /* deprecated from 3.2; replaced with -P/----pwprompt */ + /* deprecated from 3.2; replaced with -P/--pwprompt */ {"initdb-no-pwprompt", no_argument, NULL, 1}, {"check-upstream-config", no_argument, NULL, 2}, {"recovery-min-apply-delay", required_argument, NULL, 3}, From d400d7f9ac67640df22ee971b802dc9c981b9f04 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 24 Feb 2016 15:33:36 +0900 Subject: [PATCH 011/113] repmgrd: fix error message --- repmgrd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index bb4d3d06..8600a660 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -393,8 +393,8 @@ main(int argc, char **argv) local_options.cluster_name); master_conn = get_master_connection(my_local_conn, - local_options.cluster_name, - &master_options.node, NULL); + local_options.cluster_name, + &master_options.node, NULL); if (master_conn == NULL) { @@ -402,8 +402,7 @@ main(int argc, char **argv) initPQExpBuffer(&errmsg); appendPQExpBuffer(&errmsg, - _("unable to connect to master node '%s'"), - master_options.node_name); + _("unable to connect to master node")); log_err("%s\n", errmsg.data); From 4cafd443e1f024909b92c3b4295cbb5e36a82bb1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 25 Feb 2016 16:36:19 +0900 Subject: [PATCH 012/113] README: Add note about 'repmgr_funcs' --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index e76335b5..47f2493a 100644 --- a/README.md +++ b/README.md @@ -876,6 +876,10 @@ be set in `repmgr.conf`: (See `repmgr.conf.sample` for further `repmgrd`-specific settings). +Additionally, `postgresql.conf` must contain the following line: + + shared_preload_libraries = 'repmgr_funcs' + When `failover` is set to `automatic`, upon detecting failure of the current master, `repmgrd` will execute one of `promote_command` or `follow_command`, depending on whether the current server is becoming the new master or From b55519c4a2e7a95ee769af9aa4cb9874bcc8a67b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 2 Mar 2016 09:41:06 +0900 Subject: [PATCH 013/113] Add hint about registering the server after cloning it. This step is easy to forget. --- repmgr.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/repmgr.c b/repmgr.c index 03fe8edd..5634d80e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1952,7 +1952,6 @@ stop_backup: exit(retval); } - /* * Clean up any $PGDATA subdirectories which may contain * files which won't be removed by rsync and which could @@ -2013,9 +2012,9 @@ stop_backup: } /* - * XXX It might be nice to provide the following options: - * - have repmgr start the daemon automatically - * - provide a custom pg_ctl command + * XXX It might be nice to provide an options to have repmgr start + * the PostgreSQL server automatically (e.g. with a custom pg_ctl + * command) */ log_notice(_("you can now start your PostgreSQL server\n")); @@ -2029,7 +2028,28 @@ stop_backup: log_hint(_("for example : /etc/init.d/postgresql start\n")); } - /* Log the event - if we could connect to the primary */ + + /* + * XXX forgetting to (re) register the standby is a frequent cause + * of error; we should consider having repmgr automatically + * register the standby, either by default with an option + * "--no-register", or an option "--register". + * + * Note that "repmgr standby register" requires the standby to + * be running - if not, and we just update the node record, + * we'd have an incorrect representation of the replication cluster. + * Best combined with an automatic start of the server (see note + * above) + */ + + /* + * XXX detect whether a record exists for this node already, and + * add a hint about using the -F/--force. + */ + + log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"")); + + /* Log the event - if we can connect to the primary */ if (primary_conn != NULL) { From c828598bfb2281d26c8a914b5a56352d7f578430 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 28 Mar 2016 15:53:25 +0900 Subject: [PATCH 014/113] It's unlikely this situation will occur on a witness server Which is why the error message is for master/standby only. --- repmgrd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 8600a660..16cf2a89 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -274,7 +274,14 @@ main(int argc, char **argv) /* Retrieve record for this node from the local database */ node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node); - /* No node record found - exit gracefully */ + /* + * No node record found - exit gracefully + * + * Note: it's highly unlikely this situation will occur when starting + * repmgrd on a witness, unless someone goes to the trouble of + * deleting the node record from the previously copied table. + */ + if (node_info.node_id == NODE_NOT_FOUND) { log_err(_("No metadata record found for this node - terminating\n")); From daafd70383a2c97e8d0432a422cea31165dcf3f1 Mon Sep 17 00:00:00 2001 From: Nikolay Shaplov Date: Mon, 28 Mar 2016 18:56:50 +0300 Subject: [PATCH 015/113] Better use /24 network mask in this example --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 47f2493a..8bf3190c 100644 --- a/README.md +++ b/README.md @@ -284,11 +284,11 @@ similar to the following: local replication repmgr trust host replication repmgr 127.0.0.1/32 trust - host replication repmgr 192.168.1.0/32 trust + host replication repmgr 192.168.1.0/24 trust local repmgr repmgr trust host repmgr repmgr 127.0.0.1/32 trust - host repmgr repmgr 192.168.1.0/32 trust + host repmgr repmgr 192.168.1.0/24 trust Adjust according to your network environment and authentication requirements. From c48c248c15f3eb3acf46926f6221aec799771c3a Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 29 Mar 2016 16:46:18 +0900 Subject: [PATCH 016/113] Regularly sync witness server repl_nodes table. Although the witness server will resync the repl_nodes table following a failover, other operations (e.g. removing or cloning a standby) were previously not reflected in the witness server's copy of this table. As a short-term workaround, automatically resync the table at regular intervals (defined by the configuration file parameter "witness_repl_nodes_sync_interval_secs", default 30 seconds). --- config.c | 5 +++++ config.h | 3 ++- dbutils.c | 20 ++++++++++++-------- dbutils.h | 2 +- repmgr.c | 11 ++++++----- repmgr.conf.sample | 3 +++ repmgrd.c | 26 +++++++++++++++++++++++++- 7 files changed, 54 insertions(+), 16 deletions(-) diff --git a/config.c b/config.c index 46ab9c25..c32166bb 100644 --- a/config.c +++ b/config.c @@ -235,6 +235,9 @@ parse_config(t_configuration_options *options) options->monitor_interval_secs = 2; options->retry_promote_interval_secs = 300; + /* default to resyncing repl_nodes table every 30 seconds on the witness server */ + options->witness_repl_nodes_sync_interval_secs = 30; + memset(options->event_notification_command, 0, sizeof(options->event_notification_command)); options->tablespace_mapping.head = NULL; @@ -358,6 +361,8 @@ parse_config(t_configuration_options *options) options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false); else if (strcmp(name, "retry_promote_interval_secs") == 0) options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false); + else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0) + options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false); else if (strcmp(name, "use_replication_slots") == 0) /* XXX we should have a dedicated boolean argument format */ options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false); diff --git a/config.h b/config.h index 3d65637f..b5f3099b 100644 --- a/config.h +++ b/config.h @@ -75,13 +75,14 @@ typedef struct char logfile[MAXLEN]; int monitor_interval_secs; int retry_promote_interval_secs; + int witness_repl_nodes_sync_interval_secs; int use_replication_slots; char event_notification_command[MAXLEN]; EventNotificationList event_notifications; TablespaceList tablespace_mapping; } t_configuration_options; -#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } typedef struct ErrorListCell { diff --git a/dbutils.c b/dbutils.c index 0b12517d..9bc8792d 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1138,7 +1138,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) } sqlquery_snprintf(sqlquery, - "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes", + "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes", get_repmgr_schema_quoted(masterconn)); log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery); @@ -1158,7 +1158,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) log_verbose(LOG_DEBUG, "copy_configuration(): writing node record for node %s (id: %s)\n", - PQgetvalue(res, i, 4), + PQgetvalue(res, i, 3), PQgetvalue(res, i, 0)); node_record_created = create_node_record(witnessconn, @@ -1174,7 +1174,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) atoi(PQgetvalue(res, i, 5)), strlen(PQgetvalue(res, i, 6)) ? PQgetvalue(res, i, 6) - : NULL + : NULL, + (strcmp(PQgetvalue(res, i, 7), "t") == 0) + ? true + : false ); if (node_record_created == false) @@ -1200,7 +1203,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) * XXX we should pass the record parameters as a struct. */ bool -create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name) +create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active) { char sqlquery[QUERY_STR_LEN]; char upstream_node_id[MAXLEN]; @@ -1241,8 +1244,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " " (id, type, upstream_node_id, cluster, " - " name, conninfo, slot_name, priority) " - "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ", + " name, conninfo, slot_name, priority, active) " + "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ", get_repmgr_schema_quoted(conn), node, type, @@ -1251,7 +1254,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea node_name, conninfo, slot_name_buf, - priority); + priority, + active == true ? "TRUE" : "FALSE"); log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery); @@ -1291,7 +1295,7 @@ delete_node_record(PGconn *conn, int node, char *action) if (action != NULL) { - log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action); + log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action); } res = PQexec(conn, sqlquery); diff --git a/dbutils.h b/dbutils.h index df9f1065..bc34d59c 100644 --- a/dbutils.h +++ b/dbutils.h @@ -122,7 +122,7 @@ bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint); bool stop_backup(PGconn *conn, char *last_wal_segment); bool set_config_bool(PGconn *conn, const char *config_param, bool state); bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name); -bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name); +bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active); bool delete_node_record(PGconn *conn, int node, char *action); int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info); bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active); diff --git a/repmgr.c b/repmgr.c index 5634d80e..98304442 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1071,7 +1071,8 @@ do_master_register(void) options.node_name, options.conninfo, options.priority, - repmgr_slot_name_ptr); + repmgr_slot_name_ptr, + true); if (record_created == false) { @@ -1172,9 +1173,8 @@ do_standby_register(void) options.node_name, options.conninfo, options.priority, - repmgr_slot_name_ptr); - - + repmgr_slot_name_ptr, + true); if (record_created == false) { @@ -3784,7 +3784,8 @@ do_witness_create(void) options.node_name, options.conninfo, options.priority, - NULL); + NULL, + true); if (record_created == false) { diff --git a/repmgr.conf.sample b/repmgr.conf.sample index cba88886..268a41e1 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -146,3 +146,6 @@ monitor_interval_secs=2 # seconds; by default this would be half an hour, as 'retry_promote_interval_secs' # default value is 300) retry_promote_interval_secs=300 + +# Number of seconds after which the witness server resyncs the repl_nodes table +witness_repl_nodes_sync_interval_secs=15 diff --git a/repmgrd.c b/repmgrd.c index 16cf2a89..b5b3f04a 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -298,9 +298,12 @@ main(int argc, char **argv) */ do { + /* Timer for repl_nodes synchronisation interval */ + int sync_repl_nodes_elapsed = 0; + /* * Set my server mode, establish a connection to master and start - * monitor + * monitoring */ switch (node_info.type) @@ -472,6 +475,24 @@ main(int argc, char **argv) sleep(local_options.monitor_interval_secs); + /* + * On a witness node, regularly resync the repl_nodes table + * to keep up with any changes on the primary + * + * TODO: only resync the table if changes actually detected + */ + if (node_info.type == WITNESS) + { + sync_repl_nodes_elapsed += local_options.monitor_interval_secs; + log_debug(_("%i - %i \n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs); + if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs) + { + log_debug(_("Resyncing repl_nodes table\n")); + copy_configuration(master_conn, my_local_conn, local_options.cluster_name); + sync_repl_nodes_elapsed = 0; + } + } + if (got_SIGHUP) { /* @@ -486,6 +507,7 @@ main(int argc, char **argv) } got_SIGHUP = false; } + if (failover_done) { log_debug(_("standby check loop will terminate\n")); @@ -1954,6 +1976,8 @@ check_node_configuration(void) /* Adding the node */ log_info(_("adding node %d to cluster '%s'\n"), local_options.node, local_options.cluster_name); + + /* XXX use create_node_record() */ sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes" " (id, cluster, name, conninfo, priority, witness) " From 2eec17e25f296800eb111ddd0a5bc92991355c2d Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 30 Mar 2016 10:28:41 +0900 Subject: [PATCH 017/113] Add headers as dependencies in Makefile --- Makefile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d67e8fa2..80cc6dcb 100644 --- a/Makefile +++ b/Makefile @@ -5,20 +5,25 @@ repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o +HEADERS = $(wildcard *.h) + DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) -PG_LIBS = $(libpq_pgport) +PG_LIBS = $(libpq_pgport) -all: repmgrd repmgr +$(repmgr_OBJS): $(HEADERS) +$(repmgr_OBJS): $(HEADERS) + +all: repmgrd repmgr $(MAKE) -C sql repmgrd: $(repmgrd_OBJS) - $(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd + $(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) $(MAKE) -C sql repmgr: $(repmgr_OBJS) - $(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr + $(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) ifdef USE_PGXS PG_CONFIG = pg_config From 2ba57e5938931627b668352038af78521076b283 Mon Sep 17 00:00:00 2001 From: Gianni Ciolli Date: Wed, 30 Mar 2016 09:27:37 +0200 Subject: [PATCH 018/113] Rewording comment for clarity. --- repmgr.conf.sample | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 268a41e1..00dcf32b 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -37,10 +37,11 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' # Replication settings # --------------------- -# when using cascading replication and a standby is to be connected to an -# upstream standby, specify that node's ID with 'upstream_node'. The node -# must exist before the new standby can be registered. If a standby is -# to connect directly to a primary node, this parameter is not required. +# When using cascading replication, a standby can connect to another +# upstream standby node which is specified by setting 'upstream_node'. +# In that case, the upstream node must exist before the new standby +# can be registered. If 'upstream_node' is unset, then the standby +# will connect to the primary node. upstream_node=1 # use physical replication slots - PostgreSQL 9.4 and later only From ecabe2c29414aa32da0d531fde4f02c44c8a5fe7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 30 Mar 2016 16:45:47 +0900 Subject: [PATCH 019/113] Fix pg_ctl path generation in do_standby_switchover() --- repmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index 98304442..1fbd97aa 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2874,8 +2874,8 @@ do_standby_switchover(void) */ maxlen_snprintf(command, - "%s/pg_ctl -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0", - pg_bindir, + "%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0", + make_pg_path("pg_ctl"), remote_data_directory, runtime_options.pg_ctl_mode); From c93790fc962b5c8525939658c686a9c7aca00c31 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 30 Mar 2016 20:19:12 +0900 Subject: [PATCH 020/113] Check directory entity filetype in a more portable way --- repmgr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index 1fbd97aa..6d495f34 100644 --- a/repmgr.c +++ b/repmgr.c @@ -43,7 +43,6 @@ #include "repmgr.h" #include -#include #include #include #include @@ -3317,10 +3316,17 @@ do_standby_restore_config(void) } while ((arcdir_ent = readdir(arcdir)) != NULL) { + struct stat statbuf; + char arcdir_ent_path[MAXPGPATH]; PQExpBufferData src_file; PQExpBufferData dst_file; - if (arcdir_ent->d_type != DT_REG) + snprintf(arcdir_ent_path, MAXPGPATH, + "%s/%s", + runtime_options.config_archive_dir, + arcdir_ent->d_name); + + if (stat(arcdir_ent_path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) { continue; } From 491ec37adf46cdbd4c43a1b014e84944472dfce0 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 31 Mar 2016 14:58:50 +0900 Subject: [PATCH 021/113] Update HISTORY --- HISTORY | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/HISTORY b/HISTORY index 37961114..5c10f257 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,8 @@ -3.1.1 2016-02- +3.1.2 2016-04- + Fix pg_ctl path generation in do_standby_switchover() (Ian) + Regularly sync witness server repl_nodes table (Ian) + +3.1.1 2016-02-24 Add '-P/--pwprompt' option for "repmgr create witness" (Ian) Prevent repmgr/repmgrd running as root (Ian) From 57299cb97832fac306a5177eb79a6bf4fc3606df Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 31 Mar 2016 15:10:45 +0900 Subject: [PATCH 022/113] Comment out configuration items in sample config file The configured values are either the defaults, or examples which may not work in a real environment. If this file is being used as a template, the onus is on the user to uncomment and check all desired parameters. --- repmgr.conf.sample | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 00dcf32b..fc2bd5b4 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -15,21 +15,21 @@ # schema (pattern: "repmgr_{cluster}"); while this name will be quoted # to preserve case, we recommend using lower case and avoiding whitespace # to facilitate easier querying of the repmgr views and tables. -cluster=example_cluster +#cluster=example_cluster # Node ID and name # (Note: we recommend to avoid naming nodes after their initial # replication funcion, as this will cause confusion when e.g. # "standby2" is promoted to primary) -node=2 # a unique integer -node_name=node2 # an arbitrary (but unique) string; we recommend using +#node=2 # a unique integer +#node_name=node2 # an arbitrary (but unique) string; we recommend using # the server's hostname or another identifier unambiguously # associated with the server to avoid confusion # Database connection information as a conninfo string # This must be accessible to all servers in the cluster; for details see: # http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING -conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' +#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' # Optional configuration items # ============================ @@ -40,13 +40,13 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' # When using cascading replication, a standby can connect to another # upstream standby node which is specified by setting 'upstream_node'. # In that case, the upstream node must exist before the new standby -# can be registered. If 'upstream_node' is unset, then the standby -# will connect to the primary node. -upstream_node=1 +# can be registered. If 'upstream_node' is not set, then the standby +# will connect directly to the primary node. +#upstream_node=1 # use physical replication slots - PostgreSQL 9.4 and later only # (default: 0) -use_replication_slots=0 +#use_replication_slots=0 # NOTE: 'max_replication_slots' should be configured for at least the # number of standbys which will connect to the primary. @@ -56,15 +56,15 @@ use_replication_slots=0 # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG # (default: NOTICE) -loglevel=NOTICE +#loglevel=NOTICE # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER # (default: STDERR) -logfacility=STDERR +#logfacility=STDERR # stderr can be redirected to an arbitrary file: # -logfile='/var/log/repmgr/repmgr.log' +#logfile='/var/log/repmgr/repmgr.log' # event notifications can be passed to an arbitrary external program # together with the following parameters: @@ -78,12 +78,12 @@ logfile='/var/log/repmgr/repmgr.log' # the values provided for "%t" and "%d" will probably contain spaces, # so should be quoted in the provided command configuration, e.g.: # -event_notification_command='/path/to/some/script %n %e %s "%t" "%d"' +#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"' # By default, all notifications will be passed; the notification types # can be filtered to explicitly named ones: # -event_notifications=master_register,standby_register,witness_create +#event_notifications=master_register,standby_register,witness_create # Environment/command settings @@ -91,17 +91,17 @@ event_notifications=master_register,standby_register,witness_create # path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.) # (if not provided, defaults to system $PATH) -pg_bindir=/usr/bin/ +#pg_bindir=/usr/bin/ # external command options -rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" -ssh_options=-o "StrictHostKeyChecking no" +#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" +#ssh_options=-o "StrictHostKeyChecking no" # external command arguments. Values shown are examples. -pg_ctl_options='-s' -pg_basebackup_options='--xlog-method=s' +#pg_ctl_options='-s' +#pg_basebackup_options='--xlog-method=s' # Standby clone settings @@ -123,30 +123,30 @@ pg_basebackup_options='--xlog-method=s' # Number of seconds to wait for a response from the primary server before # deciding it has failed. -master_response_timeout=60 +#master_response_timeout=60 # Number of attempts at what interval (in seconds) to try and # connect to a server to establish its status (e.g. master # during failover) -reconnect_attempts=6 -reconnect_interval=10 +#reconnect_attempts=6 +#reconnect_interval=10 # Autofailover options -failover=manual # one of 'automatic', 'manual' +#failover=manual # one of 'automatic', 'manual' # (default: manual) -priority=100 # a value of zero or less prevents the node being promoted to primary +#priority=100 # a value of zero or less prevents the node being promoted to primary # (default: 100) -promote_command='repmgr standby promote -f /path/to/repmgr.conf' -follow_command='repmgr standby follow -f /path/to/repmgr.conf -W' +#promote_command='repmgr standby promote -f /path/to/repmgr.conf' +#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W' # monitoring interval in seconds; default is 2 -monitor_interval_secs=2 +#monitor_interval_secs=2 # change wait time for primary; before we bail out and exit when the primary # disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs' # seconds; by default this would be half an hour, as 'retry_promote_interval_secs' # default value is 300) -retry_promote_interval_secs=300 +#retry_promote_interval_secs=300 # Number of seconds after which the witness server resyncs the repl_nodes table -witness_repl_nodes_sync_interval_secs=15 +#witness_repl_nodes_sync_interval_secs=15 From 819937d4bd729c5a6e67ad5d5f66c56fa57abadf Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 31 Mar 2016 20:10:39 +0900 Subject: [PATCH 023/113] Replace MAXFILENAME with MAXPGPATH --- Makefile | 3 --- repmgr.c | 52 ++++++++++++++++++++++++++-------------------------- repmgr.h | 7 +++---- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index 80cc6dcb..e6d3972e 100644 --- a/Makefile +++ b/Makefile @@ -12,9 +12,6 @@ DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) -$(repmgr_OBJS): $(HEADERS) -$(repmgr_OBJS): $(HEADERS) - all: repmgrd repmgr $(MAKE) -C sql diff --git a/repmgr.c b/repmgr.c index 6d495f34..19522938 100644 --- a/repmgr.c +++ b/repmgr.c @@ -291,7 +291,7 @@ main(int argc, char **argv) strncpy(runtime_options.superuser, optarg, MAXLEN); break; case 'D': - strncpy(runtime_options.dest_dir, optarg, MAXFILENAME); + strncpy(runtime_options.dest_dir, optarg, MAXPGPATH); break; case 'l': /* -l/--local-port is deprecated */ @@ -415,7 +415,7 @@ main(int argc, char **argv) case 6: if (optarg != NULL) { - strncpy(runtime_options.pg_rewind, optarg, MAXFILENAME); + strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH); } pg_rewind_supplied = true; break; @@ -1306,23 +1306,23 @@ do_standby_clone(void) bool target_directory_provided = false; bool external_config_file_copy_required = false; - char master_data_directory[MAXFILENAME]; - char local_data_directory[MAXFILENAME]; + char master_data_directory[MAXPGPATH]; + char local_data_directory[MAXPGPATH]; - char master_config_file[MAXFILENAME] = ""; - char local_config_file[MAXFILENAME] = ""; + char master_config_file[MAXPGPATH] = ""; + char local_config_file[MAXPGPATH] = ""; bool config_file_outside_pgdata = false; - char master_hba_file[MAXFILENAME] = ""; - char local_hba_file[MAXFILENAME] = ""; + char master_hba_file[MAXPGPATH] = ""; + char local_hba_file[MAXPGPATH] = ""; bool hba_file_outside_pgdata = false; - char master_ident_file[MAXFILENAME] = ""; - char local_ident_file[MAXFILENAME] = ""; + char master_ident_file[MAXPGPATH] = ""; + char local_ident_file[MAXPGPATH] = ""; bool ident_file_outside_pgdata = false; - char master_control_file[MAXFILENAME] = ""; - char local_control_file[MAXFILENAME] = ""; + char master_control_file[MAXPGPATH] = ""; + char local_control_file[MAXPGPATH] = ""; char *first_wal_segment = NULL; char *last_wal_segment = NULL; @@ -1490,7 +1490,7 @@ do_standby_clone(void) { if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0) { - strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME); + strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXPGPATH); } else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0) { @@ -1498,7 +1498,7 @@ do_standby_clone(void) { config_file_outside_pgdata = true; external_config_file_copy_required = true; - strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME); + strncpy(master_config_file, PQgetvalue(res, i, 1), MAXPGPATH); } } else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0) @@ -1507,7 +1507,7 @@ do_standby_clone(void) { hba_file_outside_pgdata = true; external_config_file_copy_required = true; - strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME); + strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXPGPATH); } } else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0) @@ -1516,7 +1516,7 @@ do_standby_clone(void) { ident_file_outside_pgdata = true; external_config_file_copy_required = true; - strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME); + strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXPGPATH); } } else @@ -1532,20 +1532,20 @@ do_standby_clone(void) */ if (target_directory_provided) { - strncpy(local_data_directory, runtime_options.dest_dir, MAXFILENAME); - strncpy(local_config_file, runtime_options.dest_dir, MAXFILENAME); - strncpy(local_hba_file, runtime_options.dest_dir, MAXFILENAME); - strncpy(local_ident_file, runtime_options.dest_dir, MAXFILENAME); + strncpy(local_data_directory, runtime_options.dest_dir, MAXPGPATH); + strncpy(local_config_file, runtime_options.dest_dir, MAXPGPATH); + strncpy(local_hba_file, runtime_options.dest_dir, MAXPGPATH); + strncpy(local_ident_file, runtime_options.dest_dir, MAXPGPATH); } /* * Otherwise use the same data directory as on the remote host */ else { - strncpy(local_data_directory, master_data_directory, MAXFILENAME); - strncpy(local_config_file, master_config_file, MAXFILENAME); - strncpy(local_hba_file, master_hba_file, MAXFILENAME); - strncpy(local_ident_file, master_ident_file, MAXFILENAME); + strncpy(local_data_directory, master_data_directory, MAXPGPATH); + strncpy(local_config_file, master_config_file, MAXPGPATH); + strncpy(local_hba_file, master_hba_file, MAXPGPATH); + strncpy(local_ident_file, master_ident_file, MAXPGPATH); log_notice(_("setting data directory to: %s\n"), local_data_directory); log_hint(_("use -D/--data-dir to explicitly specify a data directory\n")); @@ -2253,7 +2253,7 @@ do_standby_follow(void) int r, retval; - char data_dir[MAXFILENAME]; + char data_dir[MAXPGPATH]; bool success; @@ -2336,7 +2336,7 @@ do_standby_follow(void) master_id = get_master_node_id(master_conn, options.cluster_name); - strncpy(data_dir, runtime_options.dest_dir, MAXFILENAME); + strncpy(data_dir, runtime_options.dest_dir, MAXPGPATH); } diff --git a/repmgr.h b/repmgr.h index ad8a34d7..7c247e82 100644 --- a/repmgr.h +++ b/repmgr.h @@ -33,7 +33,6 @@ #define MIN_SUPPORTED_VERSION_NUM 90300 #include "config.h" -#define MAXFILENAME 1024 #define ERRBUFF_SIZE 512 #define DEFAULT_WAL_KEEP_SEGMENTS "5000" @@ -57,8 +56,8 @@ typedef struct char dbname[MAXLEN]; char host[MAXLEN]; char username[MAXLEN]; - char dest_dir[MAXFILENAME]; - char config_file[MAXFILENAME]; + char dest_dir[MAXPGPATH]; + char config_file[MAXPGPATH]; char remote_user[MAXLEN]; char superuser[MAXLEN]; char wal_keep_segments[MAXLEN]; @@ -81,7 +80,7 @@ typedef struct /* parameter used by STANDBY SWITCHOVER */ char remote_config_file[MAXLEN]; - char pg_rewind[MAXFILENAME]; + char pg_rewind[MAXPGPATH]; /* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */ char config_archive_dir[MAXLEN]; /* parameter used by CLUSTER CLEANUP */ From 190cc7dcb460f6695362686a71ed3ae0c8f727b9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Apr 2016 08:44:23 +0900 Subject: [PATCH 024/113] Rename copy_configuration () to witness_copy_node_records() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it's witness-specific. Per suggestion from Martín. --- dbutils.c | 12 ++++++------ dbutils.h | 2 +- repmgr.c | 2 +- repmgrd.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbutils.c b/dbutils.c index 9bc8792d..f3dfd444 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1111,7 +1111,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state) /* - * copy_configuration() + * witness_copy_node_records() * * Copy records in master's `repl_nodes` table to witness database * @@ -1119,7 +1119,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state) * `repmgrd` after a failover event occurs */ bool -copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) +witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) { char sqlquery[MAXLEN]; PGresult *res; @@ -1127,7 +1127,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn)); - log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery); + log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery); res = PQexec(witnessconn, sqlquery); if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) @@ -1141,7 +1141,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes", get_repmgr_schema_quoted(masterconn)); - log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery); + log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery); res = PQexec(masterconn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -1157,12 +1157,12 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) bool node_record_created; log_verbose(LOG_DEBUG, - "copy_configuration(): writing node record for node %s (id: %s)\n", + "witness_copy_node_records(): writing node record for node %s (id: %s)\n", PQgetvalue(res, i, 3), PQgetvalue(res, i, 0)); node_record_created = create_node_record(witnessconn, - "copy_configuration", + "witness_copy_node_records", atoi(PQgetvalue(res, i, 0)), PQgetvalue(res, i, 1), strlen(PQgetvalue(res, i, 2)) diff --git a/dbutils.h b/dbutils.h index bc34d59c..9d08c8bd 100644 --- a/dbutils.h +++ b/dbutils.h @@ -121,7 +121,7 @@ bool drop_replication_slot(PGconn *conn, char *slot_name); bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint); bool stop_backup(PGconn *conn, char *last_wal_segment); bool set_config_bool(PGconn *conn, const char *config_param, bool state); -bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name); +bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name); bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active); bool delete_node_record(PGconn *conn, int node, char *action); int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info); diff --git a/repmgr.c b/repmgr.c index 19522938..18fd535f 100644 --- a/repmgr.c +++ b/repmgr.c @@ -3808,7 +3808,7 @@ do_witness_create(void) /* copy configuration from master, only repl_nodes is needed */ - if (!copy_configuration(masterconn, witnessconn, options.cluster_name)) + if (!witness_copy_node_records(masterconn, witnessconn, options.cluster_name)) { create_event_record(masterconn, &options, diff --git a/repmgrd.c b/repmgrd.c index b5b3f04a..6d9b1c8c 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -488,7 +488,7 @@ main(int argc, char **argv) if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs) { log_debug(_("Resyncing repl_nodes table\n")); - copy_configuration(master_conn, my_local_conn, local_options.cluster_name); + witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name); sync_repl_nodes_elapsed = 0; } } @@ -600,7 +600,7 @@ witness_monitor(void) * XXX it would be neat to be able to handle this with e.g. table-based * logical replication */ - copy_configuration(master_conn, my_local_conn, local_options.cluster_name); + witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name); break; } From 2a8d6f72c61eb6f048e39361e03295807fae1431 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Apr 2016 11:07:17 +0900 Subject: [PATCH 025/113] Make witness server node update an atomic operation If the connection to the primary is lost, roll back to the previously known state. TRUNCATE is of course not MVCC-friendly, but that shouldn't matter here as only one process should ever be looking at this table. --- dbutils.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dbutils.c b/dbutils.c index f3dfd444..834c4d71 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1125,6 +1125,8 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster PGresult *res; int i; + begin_transaction(witnessconn); + sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn)); log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery); @@ -1149,6 +1151,8 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster log_err("Unable to retrieve node records from master:\n%s\n", PQerrorMessage(masterconn)); PQclear(res); + rollback_transaction(witnessconn); + return false; } @@ -1186,11 +1190,15 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster log_err("Unable to copy node record to witness database\n%s\n", PQerrorMessage(witnessconn)); + rollback_transaction(witnessconn); + return false; } } PQclear(res); + commit_transaction(witnessconn); + return true; } From 5d32026b795ce48384e5c9f742b5433a706eb04e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Apr 2016 11:29:35 +0900 Subject: [PATCH 026/113] Improve debugging output for node resyncing We'll need this for testing. --- repmgrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 6d9b1c8c..7bdee2e5 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -484,7 +484,7 @@ main(int argc, char **argv) if (node_info.type == WITNESS) { sync_repl_nodes_elapsed += local_options.monitor_interval_secs; - log_debug(_("%i - %i \n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs); + log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs); if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs) { log_debug(_("Resyncing repl_nodes table\n")); From 5bc809466c750f93ee452bcdbe4c66c09980e915 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Apr 2016 15:19:22 +0900 Subject: [PATCH 027/113] Make self-referencing foreign key on repl_nodes table deferrable --- dbutils.c | 20 ++++++++++++++++++++ repmgr.c | 2 +- sql/repmgr3.1.1_repmgr3.1.2.sql | 31 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 sql/repmgr3.1.1_repmgr3.1.2.sql diff --git a/dbutils.c b/dbutils.c index 834c4d71..5a378f90 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1127,6 +1127,21 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster begin_transaction(witnessconn); + /* Defer constraints */ + sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;"); + log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery); + + res = PQexec(witnessconn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_err(_("Unable to defer constraints:\n%s\n"), + PQerrorMessage(witnessconn)); + rollback_transaction(witnessconn); + + return false; + } + + /* Truncate existing records */ sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn)); log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery); @@ -1136,9 +1151,12 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster { log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"), PQerrorMessage(witnessconn)); + rollback_transaction(witnessconn); + return false; } + /* Get current records from primary */ sqlquery_snprintf(sqlquery, "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes", get_repmgr_schema_quoted(masterconn)); @@ -1156,6 +1174,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster return false; } + /* Insert primary records into witness table */ for (i = 0; i < PQntuples(res); i++) { bool node_record_created; @@ -1197,6 +1216,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster } PQclear(res); + /* And finished */ commit_transaction(witnessconn); return true; diff --git a/repmgr.c b/repmgr.c index 18fd535f..6be3a3e2 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4505,7 +4505,7 @@ create_schema(PGconn *conn) "CREATE TABLE %s.repl_nodes ( " " id INTEGER PRIMARY KEY, " " type TEXT NOT NULL CHECK (type IN('master','standby','witness')), " - " upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id), " + " upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id) DEFERRABLE, " " cluster TEXT NOT NULL, " " name TEXT NOT NULL, " " conninfo TEXT NOT NULL, " diff --git a/sql/repmgr3.1.1_repmgr3.1.2.sql b/sql/repmgr3.1.1_repmgr3.1.2.sql new file mode 100644 index 00000000..7dedacc8 --- /dev/null +++ b/sql/repmgr3.1.1_repmgr3.1.2.sql @@ -0,0 +1,31 @@ +/* + * Update a repmgr 3.1.1 installation to repmgr 3.1.2 + * -------------------------------------------------- + * + * This update is only required if repmgrd is being used in conjunction + * with a witness server. + * + * The new repmgr package should be installed first. Then + * carry out these steps: + * + * 1. (If repmgrd is used) stop any running repmgrd instances + * 2. On the master node, execute the SQL statement listed below + * 3. (If repmgrd is used) restart repmgrd + */ + +/* + * If your repmgr installation is not included in your repmgr + * user's search path, please set the search path to the name + * of the repmgr schema to ensure objects are installed in + * the correct location. + * + * The repmgr schema is "repmgr_" + the cluster name defined in + * 'repmgr.conf'. + */ + +-- SET search_path TO 'name_of_repmgr_schema'; + +BEGIN; + +ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE; +COMMIT; From f9a150504a4d58f1f190529a4c10adca220b3e8e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Apr 2016 12:41:03 +0900 Subject: [PATCH 028/113] Enable repmgr to be compiled with PostgreSQL 9.6 --- repmgr.c | 29 ++++++++++++++++++++++++----- sql/repmgr_funcs.c | 9 +++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/repmgr.c b/repmgr.c index 6be3a3e2..22c8477f 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4872,22 +4872,41 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error) } else { - char *levels[] = { + char *levels_pre96[] = { "hot_standby", "logical", + NULL, }; - int j = 0; - wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'"); + char *levels_96plus[] = { + "replica", + "logical", + NULL, + }; - for(; j < 2; j++) + char **levels; + int j = 0; + + if (server_version_num < 90500) + { + levels = (char **)levels_pre96; + wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'"); + } + else + { + levels = (char **)levels_96plus; + wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'"); + } + + do { i = guc_set(conn, "wal_level", "=", levels[j]); if (i) { break; } - } + j++; + } while (levels[j] != NULL); } if (i == 0 || i == -1) diff --git a/sql/repmgr_funcs.c b/sql/repmgr_funcs.c index 7ac1230d..a9aa9026 100644 --- a/sql/repmgr_funcs.c +++ b/sql/repmgr_funcs.c @@ -83,7 +83,12 @@ _PG_init(void) * resources in repmgr_shmem_startup(). */ RequestAddinShmemSpace(repmgr_memsize()); + +#if (PG_VERSION_NUM >= 90600) + RequestNamedLWLockTranche("repmgr", 1); +#else RequestAddinLWLocks(1); +#endif /* * Install hooks. @@ -128,7 +133,11 @@ repmgr_shmem_startup(void) if (!found) { /* First time through ... */ +#if (PG_VERSION_NUM >= 90600) + shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock; +#else shared_state->lock = LWLockAssign(); +#endif snprintf(shared_state->location, sizeof(shared_state->location), "%X/%X", 0, 0); } From e3e1c5de4eacb02b252f50d6aa8aba2b8b986960 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Apr 2016 12:56:00 +0900 Subject: [PATCH 029/113] Use "immediately_reserve" parameter in pg_create_physical_replication_slot (9.6) --- dbutils.c | 18 ++++++++++++++---- dbutils.h | 3 ++- repmgr.c | 6 ++++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/dbutils.c b/dbutils.c index 5a378f90..243231cb 100644 --- a/dbutils.c +++ b/dbutils.c @@ -889,7 +889,7 @@ get_repmgr_schema_quoted(PGconn *conn) bool -create_replication_slot(PGconn *conn, char *slot_name) +create_replication_slot(PGconn *conn, char *slot_name, int server_version_num) { char sqlquery[QUERY_STR_LEN]; int query_res; @@ -926,9 +926,19 @@ create_replication_slot(PGconn *conn, char *slot_name) return false; } - sqlquery_snprintf(sqlquery, - "SELECT * FROM pg_create_physical_replication_slot('%s')", - slot_name); + /* In 9.6 and later, reserve the LSN straight away */ + if (server_version_num >= 90600) + { + sqlquery_snprintf(sqlquery, + "SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)", + slot_name); + } + else + { + sqlquery_snprintf(sqlquery, + "SELECT * FROM pg_create_physical_replication_slot('%s')", + slot_name); + } log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name); log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery); diff --git a/dbutils.h b/dbutils.h index 9d08c8bd..8d980d8e 100644 --- a/dbutils.h +++ b/dbutils.h @@ -115,7 +115,7 @@ int wait_connection_availability(PGconn *conn, long long timeout); bool cancel_query(PGconn *conn, int timeout); char *get_repmgr_schema(void); char *get_repmgr_schema_quoted(PGconn *conn); -bool create_replication_slot(PGconn *conn, char *slot_name); +bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num); int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record); bool drop_replication_slot(PGconn *conn, char *slot_name); bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint); @@ -133,3 +133,4 @@ int get_node_replication_state(PGconn *conn, char *node_name, char *output) t_server_type parse_node_type(const char *type); int get_data_checksum_version(const char *data_directory); #endif + diff --git a/repmgr.c b/repmgr.c index 22c8477f..74a70334 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1585,7 +1585,7 @@ do_standby_clone(void) */ if (options.use_replication_slots) { - if (create_replication_slot(upstream_conn, repmgr_slot_name) == false) + if (create_replication_slot(upstream_conn, repmgr_slot_name, server_version_num) == false) { PQfinish(upstream_conn); exit(ERR_DB_QUERY); @@ -2368,7 +2368,9 @@ do_standby_follow(void) if (options.use_replication_slots) { - if (create_replication_slot(master_conn, repmgr_slot_name) == false) + int server_version_num = get_server_version(master_conn, NULL); + + if (create_replication_slot(master_conn, repmgr_slot_name, server_version_num) == false) { PQExpBufferData event_details; initPQExpBuffer(&event_details); From 8cd2c6fd05225535e985175aa3e313dbe3858339 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Apr 2016 14:57:20 +0900 Subject: [PATCH 030/113] Add comment about wal_level setting interpretation --- repmgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/repmgr.c b/repmgr.c index 74a70334..e3017bce 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4880,6 +4880,10 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error) NULL, }; + /* + * Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases + * for "replica", but current_setting() will of course always return "replica" + */ char *levels_96plus[] = { "replica", "logical", From eb31a561862cdf300c725c713ed2f19b9947b57f Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 5 Apr 2016 22:30:19 +0900 Subject: [PATCH 031/113] Enable long option --pgdata as alias for -D/--data-dir pg_ctl provides -D/--pgdata; we want to be as close to the core utilities as possible. --- repmgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/repmgr.c b/repmgr.c index e3017bce..a3cca315 100644 --- a/repmgr.c +++ b/repmgr.c @@ -158,6 +158,8 @@ main(int argc, char **argv) {"username", required_argument, NULL, 'U'}, {"superuser", required_argument, NULL, 'S'}, {"data-dir", required_argument, NULL, 'D'}, + /* alias for -D/--data-dir, following pg_ctl usage */ + {"pgdata", required_argument, NULL, 'D'}, /* -l/--local-port is deprecated */ {"local-port", required_argument, NULL, 'l'}, {"config-file", required_argument, NULL, 'f'}, From 83e5f981711c0675c0c5b275eb8326ede86ff2f5 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 5 Apr 2016 15:22:40 -0300 Subject: [PATCH 032/113] Ignore rsync error code for vanished files. It's very common to come over vanish files during a backup or rsync o the data directory (dropped index, temp tables, etc.) This fixes #149 --- repmgr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index a3cca315..5008bc6d 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1655,7 +1655,13 @@ do_standby_clone(void) r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_data_directory, local_data_directory, true, server_version_num); - if (r != 0) + /* + Exit code 0 means no error, but we want to ignore exit code 24 as well + as rsync returns that code on "Partial transfer due to vanished source files". + It's quite common for this to happen on the data directory, particularly + with long running rsync on a busy server. + */ + if (r != 0 && r != 24) { log_warning(_("standby clone: failed copying master data directory '%s'\n"), master_data_directory); From a886fddcccc148d476c58d4212acac4653e907df Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 5 Apr 2016 15:30:42 -0300 Subject: [PATCH 033/113] We were not checking the return code after rsyncing the tablespaces. This fixes #168 --- repmgr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/repmgr.c b/repmgr.c index 5008bc6d..4d51c024 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1735,6 +1735,18 @@ do_standby_clone(void) tblspc_dir_src.data, tblspc_dir_dst.data, true, server_version_num); + /* + Exit code 0 means no error, but we want to ignore exit code 24 as well + as rsync returns that code on "Partial transfer due to vanished source files". + It's quite common for this to happen on the data directory, particularly + with long running rsync on a busy server. + */ + if (r != 0 && r != 24) + { + log_warning(_("standby clone: failed copying tablespace directory '%s'\n"), + tblspc_dir_src.data); + goto stop_backup; + } /* Update symlinks in pg_tblspc */ if (mapping_found == true) From 1db577e294441d9046490da7abc710ca71325e5a Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 5 Apr 2016 10:59:26 +0900 Subject: [PATCH 034/113] Update Makefile Add include file dependencies (see caveat in file). Also update comments. --- Makefile | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e6d3972e..1fe7ec6b 100644 --- a/Makefile +++ b/Makefile @@ -2,16 +2,17 @@ # Makefile # Copyright (c) 2ndQuadrant, 2010-2016 +HEADERS = $(wildcard *.h) + repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o -HEADERS = $(wildcard *.h) - DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) + all: repmgrd repmgr $(MAKE) -C sql @@ -22,6 +23,12 @@ repmgrd: $(repmgrd_OBJS) repmgr: $(repmgr_OBJS) $(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) +# Make all objects depend on all include files. This is a bit of a +# shotgun approach, but the codebase is small enough that a complete rebuild +# is very fast anyway. +$(repmgr_OBJS): $(HEADERS) +$(repmgrd_OBJS): $(HEADERS) + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) @@ -33,8 +40,8 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif -# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now -# is overriding pgxs install. +# XXX: This overrides the pgxs install target - we're building two binaries, +# which is not supported by pgxs.mk's PROGRAM construct. install: install_prog install_ext install_prog: @@ -45,6 +52,12 @@ install_prog: install_ext: $(MAKE) -C sql install +# Distribution-specific package building targets +# ---------------------------------------------- +# +# XXX we recommend using the PGDG-supplied packages where possible; +# see README.md for details. + install_rhel: mkdir -p '$(DESTDIR)/etc/init.d/' $(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd' From 925d82f7a42040910f4142276ae4d414055df1a1 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 5 Apr 2016 21:36:07 -0300 Subject: [PATCH 035/113] Add to the documentation the need to have archive_mode and archive_command set in postgresql.conf Fixes #154 --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 8bf3190c..02fbb201 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,20 @@ The following replication settings must be included in `postgresql.conf`: hot_standby = on + # If archive_mode is enabled, check that 'archive_command' is non empty + # (however it's not practical to check that it actually represents a valid + # command). + # + # From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always' + # so for ease of backwards compatibility, rather than explicitly check for an + # enabled mode, check that it's not "off". + archive_mode = on + + # Set archive command to a script or application that will safetly store + # you WALs in a secure place. /bin/true is an example of a command that + # ignores archiving. Use something more sensible. + archive_command = '/bin/true' + * * * From c22f4eaf6f70c9c9882f91b3b56dfdfb7cc87703 Mon Sep 17 00:00:00 2001 From: Craig Ringer Date: Wed, 30 Mar 2016 14:51:12 +0800 Subject: [PATCH 036/113] WIP support for preserving failover slots --- errcode.h | 1 + repmgr.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++- repmgr.h | 11 ++++ strutil.h | 5 ++ 4 files changed, 174 insertions(+), 2 deletions(-) diff --git a/errcode.h b/errcode.h index d1e566f4..3a9b70e3 100644 --- a/errcode.h +++ b/errcode.h @@ -37,5 +37,6 @@ #define ERR_BAD_BASEBACKUP 14 #define ERR_INTERNAL 15 #define ERR_MONITORING_FAIL 16 +#define ERR_BAD_BACKUP_LABEL 17 #endif /* _ERRCODE_H_ */ diff --git a/repmgr.c b/repmgr.c index 4d51c024..44bec7df 100644 --- a/repmgr.c +++ b/repmgr.c @@ -121,6 +121,8 @@ static bool remote_command(const char *host, const char *user, const char *comma static void format_db_cli_params(const char *conninfo, char *output); static bool copy_file(const char *old_filename, const char *new_filename); +static void read_backup_label(const char *local_data_directory, struct BackupLabel *backup_label); + /* Global variables */ static const char *keywords[6]; static const char *values[6]; @@ -142,6 +144,8 @@ static char repmgr_slot_name[MAXLEN] = ""; static char *repmgr_slot_name_ptr = NULL; static char path_buf[MAXLEN] = ""; +static struct BackupLabel backup_label; + /* Collate command line errors and warnings here for friendlier reporting */ ErrorList cli_errors = { NULL, NULL }; ErrorList cli_warnings = { NULL, NULL }; @@ -1971,6 +1975,8 @@ stop_backup: exit(retval); } + read_backup_label(local_data_directory, &backup_label); + /* * Clean up any $PGDATA subdirectories which may contain * files which won't be removed by rsync and which could @@ -1999,12 +2005,17 @@ stop_backup: * behaviour a base backup, which would result in an empty * pg_replslot directory. * + * If the backup label contains a nonzero + * 'MIN FAILOVER SLOT LSN' entry we retain the slots and let + * the server clean them up instead, matching pg_basebackup's + * behaviour when failover slots are enabled. + * * NOTE: watch out for any changes in the replication * slot directory name (as of 9.4: "pg_replslot") and * functionality of replication slots */ - - if (server_version_num >= 90400) + if (server_version_num >= 90400 && + backup_label.min_failover_slot_lsn == InvalidXLogRecPtr) { maxlen_snprintf(script, "rm -rf %s/pg_replslot/*", local_data_directory); @@ -2100,6 +2111,150 @@ stop_backup: exit(retval); } +static bool +parse_lsn(XLogRecPtr *ptr, const char *str) +{ + uint32 high, low; + + if (sscanf(str, "%x/%x", &high, &low) != 2) + return false; + + *ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low; + + return true; +} + +static XLogRecPtr +parse_label_lsn(const char *label_key, const char *label_value) +{ + XLogRecPtr ptr; + + if (!parse_lsn(&ptr, label_value)) + { + log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"), + label_key, label_value); + + exit(ERR_BAD_BACKUP_LABEL); + } + + return ptr; +} + +/*====================================== + * Read entries of interest from the backup label. + * + * Sample backup label: + * + * START WAL LOCATION: 0/6000028 (file 000000010000000000000006) + * CHECKPOINT LOCATION: 0/6000060 + * BACKUP METHOD: streamed + * BACKUP FROM: master + * START TIME: 2016-03-30 12:18:12 AWST + * LABEL: pg_basebackup base backup + * MIN FAILOVER SLOT LSN: 0/5000000 + * + *====================================== + */ +static void +read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label) +{ + char label_path[MAXFILENAME]; + FILE *label_file; + int nmatches = 0; + char label_key[MAXLEN]; + char label_value[MAXLEN]; + + out_backup_label->start_wal_location = InvalidXLogRecPtr; + out_backup_label->checkpoint_location = InvalidXLogRecPtr; + out_backup_label->backup_from[0] = '\0'; + out_backup_label->backup_method[0] = '\0'; + out_backup_label->start_time[0] = '\0'; + out_backup_label->label[0] = '\0'; + out_backup_label->min_failover_slot_lsn = InvalidXLogRecPtr; + + maxlen_snprintf(label_path, "%s/backup_label", local_data_directory); + + label_file = fopen(label_path, "r"); + if (label_file == NULL) + { + log_err(_("could not open backup label file %s: %s"), + label_path, strerror(errno)); + exit(ERR_BAD_BACKUP_LABEL); + } + + log_info(_("standby clone: backup label file '%s'\n"), + label_path); + + do + { + char newline; + + /* + * Scan a line, including newline char. + * + * See http://stackoverflow.com/a/8097776/398670 + */ + nmatches = fscanf(label_file, "%" MAXLEN_STR "s: %" MAXLEN_STR "[^\n]%c", + &label_key[0], &label_value[0], &newline); + + if (nmatches != 3) + break; + + if (newline != '\n') + { + log_err(_("standby clone: line too long in backup label file. Line begins \"%s: %s\""), + label_key, label_value); + exit(ERR_BAD_BACKUP_LABEL); + } + + log_debug("standby clone: got backup label entry \"%s: %s\"", + label_key, label_value); + + if (strcmp(label_key, "START WAL LOCATION") == 0) + { + out_backup_label->start_wal_location = + parse_label_lsn(&label_key[0], &label_value[0]); + } + else if (strcmp(label_key, "CHECKPOINT LOCATION") == 0) + { + out_backup_label->checkpoint_location = + parse_label_lsn(&label_key[0], &label_value[0]); + } + else if (strcmp(label_key, "BACKUP METHOD") == 0) + { + (void) strncpy(out_backup_label->backup_method, label_value, MAXLEN); + out_backup_label->backup_method[MAXLEN-1] = '\0'; + } + else if (strcmp(label_key, "BACKUP FROM") == 0) + { + (void) strncpy(out_backup_label->backup_from, label_value, MAXLEN); + out_backup_label->backup_from[MAXLEN-1] = '\0'; + } + else if (strcmp(label_key, "START TIME") == 0) + { + (void) strncpy(out_backup_label->start_time, label_value, MAXLEN); + out_backup_label->start_time[MAXLEN-1] = '\0'; + } + else if (strcmp(label_key, "LABEL") == 0) + { + (void) strncpy(out_backup_label->label, label_value, MAXLEN); + out_backup_label->label[MAXLEN-1] = '\0'; + } + else if (strcmp(label_key, "MIN FAILOVER SLOT LSN") == 0) + { + out_backup_label->min_failover_slot_lsn = + parse_label_lsn(&label_key[0], &label_value[0]); + } + else + { + log_info("standby clone: ignored unrecognised backup label entry \"%s: %s\"", + label_key, label_value); + } + } + while (!feof(label_file)); + + (void) fclose(label_file); +} static void do_standby_promote(void) diff --git a/repmgr.h b/repmgr.h index 7c247e82..8901b92c 100644 --- a/repmgr.h +++ b/repmgr.h @@ -97,6 +97,17 @@ typedef struct #define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } +struct BackupLabel +{ + XLogRecPtr start_wal_location; + XLogRecPtr checkpoint_location; + char backup_from[MAXLEN]; + char backup_method[MAXLEN]; + char start_time[MAXLEN]; + char label[MAXLEN]; + XLogRecPtr min_failover_slot_lsn; +}; + extern char repmgr_schema[MAXLEN]; extern bool config_file_found; diff --git a/strutil.h b/strutil.h index 25d1f34b..afc5abc2 100644 --- a/strutil.h +++ b/strutil.h @@ -24,12 +24,17 @@ #include #include "errcode.h" + #define QUERY_STR_LEN 8192 #define MAXLEN 1024 #define MAXLINELENGTH 4096 #define MAXVERSIONSTR 16 #define MAXCONNINFO 1024 +/* Why? http://stackoverflow.com/a/5459929/398670 */ +#define STR(x) CppAsString(x) + +#define MAXLEN_STR STR(MAXLEN) extern int xsnprintf(char *str, size_t size, const char *format,...) From 9f6f58e4ed6beae4d3d11652efcceea63036de5c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Apr 2016 14:53:40 +0900 Subject: [PATCH 037/113] MAXFILENAME -> MAXPGPATH --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 44bec7df..0633d967 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2158,7 +2158,7 @@ parse_label_lsn(const char *label_key, const char *label_value) static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label) { - char label_path[MAXFILENAME]; + char label_path[MAXPGPATH]; FILE *label_file; int nmatches = 0; char label_key[MAXLEN]; From b5a7efa58e4f903f1ce0458ea37e165c0dfc76ea Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 5 Apr 2016 16:47:47 +0900 Subject: [PATCH 038/113] Preserve failover slots when cloning a standby, if enabled --- repmgr.c | 113 +++++++++++++++++++++++++++++++++---------------------- repmgr.h | 1 + 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/repmgr.c b/repmgr.c index 0633d967..b01520c0 100644 --- a/repmgr.c +++ b/repmgr.c @@ -121,7 +121,7 @@ static bool remote_command(const char *host, const char *user, const char *comma static void format_db_cli_params(const char *conninfo, char *output); static bool copy_file(const char *old_filename, const char *new_filename); -static void read_backup_label(const char *local_data_directory, struct BackupLabel *backup_label); +static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label); /* Global variables */ static const char *keywords[6]; @@ -144,12 +144,11 @@ static char repmgr_slot_name[MAXLEN] = ""; static char *repmgr_slot_name_ptr = NULL; static char path_buf[MAXLEN] = ""; -static struct BackupLabel backup_label; - /* Collate command line errors and warnings here for friendlier reporting */ ErrorList cli_errors = { NULL, NULL }; ErrorList cli_warnings = { NULL, NULL }; +static struct BackupLabel backup_label; int main(int argc, char **argv) @@ -1335,6 +1334,7 @@ do_standby_clone(void) PQExpBufferData event_details; + /* * If dest_dir (-D/--pgdata) was provided, this will become the new data * directory (otherwise repmgr will default to the same directory as on the @@ -1672,6 +1672,12 @@ do_standby_clone(void) goto stop_backup; } + /* Read backup label copied from primary */ + /* XXX ensure this function does not exit on error as we'd need to stop the backup */ + read_backup_label(local_data_directory, &backup_label); + + printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file); + /* Handle tablespaces */ sqlquery_snprintf(sqlquery, @@ -1975,35 +1981,38 @@ stop_backup: exit(retval); } - read_backup_label(local_data_directory, &backup_label); - /* * Clean up any $PGDATA subdirectories which may contain * files which won't be removed by rsync and which could * be stale or are otherwise not required */ - if (runtime_options.rsync_only && runtime_options.force) + if (runtime_options.rsync_only) { char script[MAXLEN]; + char label_path[MAXPGPATH]; - /* - * Remove any existing WAL from the target directory, since - * rsync's --exclude option doesn't do it. - */ - maxlen_snprintf(script, "rm -rf %s/pg_xlog/*", - local_data_directory); - r = system(script); - if (r != 0) + if (runtime_options.force) { - log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"), - local_data_directory); - exit(ERR_BAD_RSYNC); + /* + * Remove any existing WAL from the target directory, since + * rsync's --exclude option doesn't do it. + */ + maxlen_snprintf(script, "rm -rf %s/pg_xlog/*", + local_data_directory); + r = system(script); + if (r != 0) + { + log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"), + local_data_directory); + exit(ERR_BAD_RSYNC); + } } /* - * Remove any replication slot directories; this matches the - * behaviour a base backup, which would result in an empty - * pg_replslot directory. + * Remove any existing replication slot directories from previous use + * of this data directory; this matches the behaviour of a fresh + * pg_basebackup, which would usually result in an empty pg_replslot + * directory. * * If the backup label contains a nonzero * 'MIN FAILOVER SLOT LSN' entry we retain the slots and let @@ -2019,6 +2028,8 @@ stop_backup: { maxlen_snprintf(script, "rm -rf %s/pg_replslot/*", local_data_directory); + + log_debug("deleting pg_replslot directory contents\n"); r = system(script); if (r != 0) { @@ -2027,6 +2038,13 @@ stop_backup: exit(ERR_BAD_RSYNC); } } + + /* delete the backup label file copied from the primary */ + maxlen_snprintf(label_path, "%s/backup_label", local_data_directory); + if (0 && unlink(label_path) < 0 && errno != ENOENT) + { + log_warning(_("unable to delete backup label file %s\n"), label_path); + } } /* Finally, write the recovery.conf file */ @@ -2143,7 +2161,7 @@ parse_label_lsn(const char *label_key, const char *label_value) /*====================================== * Read entries of interest from the backup label. * - * Sample backup label: + * Sample backup label (with failover slots): * * START WAL LOCATION: 0/6000028 (file 000000010000000000000006) * CHECKPOINT LOCATION: 0/6000060 @@ -2161,10 +2179,11 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back char label_path[MAXPGPATH]; FILE *label_file; int nmatches = 0; - char label_key[MAXLEN]; - char label_value[MAXLEN]; + + char line[MAXLEN]; out_backup_label->start_wal_location = InvalidXLogRecPtr; + out_backup_label->start_wal_file[0] = '\0'; out_backup_label->checkpoint_location = InvalidXLogRecPtr; out_backup_label->backup_from[0] = '\0'; out_backup_label->backup_method[0] = '\0'; @@ -2177,43 +2196,52 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back label_file = fopen(label_path, "r"); if (label_file == NULL) { - log_err(_("could not open backup label file %s: %s"), + log_err(_("read_backup_label: could not open backup label file %s: %s"), label_path, strerror(errno)); exit(ERR_BAD_BACKUP_LABEL); } - log_info(_("standby clone: backup label file '%s'\n"), + log_info(_("read_backup_label: parsing backup label file '%s'\n"), label_path); - do + while(fgets(line, sizeof line, label_file) != NULL) { + char label_key[MAXLEN]; + char label_value[MAXLEN]; char newline; - /* - * Scan a line, including newline char. - * - * See http://stackoverflow.com/a/8097776/398670 - */ - nmatches = fscanf(label_file, "%" MAXLEN_STR "s: %" MAXLEN_STR "[^\n]%c", - &label_key[0], &label_value[0], &newline); + nmatches = sscanf(line, "%" MAXLEN_STR "[^:]: %" MAXLEN_STR "[^\n]%c", + label_key, label_value, &newline); if (nmatches != 3) break; if (newline != '\n') { - log_err(_("standby clone: line too long in backup label file. Line begins \"%s: %s\""), + log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""), label_key, label_value); exit(ERR_BAD_BACKUP_LABEL); } - log_debug("standby clone: got backup label entry \"%s: %s\"", + log_debug("standby clone: got backup label entry \"%s: %s\"\n", label_key, label_value); if (strcmp(label_key, "START WAL LOCATION") == 0) { + char start_wal_location[MAXLEN]; + char wal_filename[MAXLEN]; + + nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename); + if (nmatches != 2) + { + log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n")); + exit(ERR_BAD_BACKUP_LABEL); + } out_backup_label->start_wal_location = - parse_label_lsn(&label_key[0], &label_value[0]); + parse_label_lsn(&label_key[0], start_wal_location); + + (void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN); + out_backup_label->start_wal_file[MAXLEN-1] = '\0'; } else if (strcmp(label_key, "CHECKPOINT LOCATION") == 0) { @@ -2247,11 +2275,10 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back } else { - log_info("standby clone: ignored unrecognised backup label entry \"%s: %s\"", + log_info("read_backup_label: ignored unrecognised backup label entry \"%s: %s\"", label_key, label_value); } } - while (!feof(label_file)); (void) fclose(label_file); } @@ -4246,6 +4273,7 @@ test_ssh_connection(char *host, char *remote_user) return r; } + static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory, int server_version_num) @@ -4290,6 +4318,9 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, * See function 'sendDir()' in 'src/backend/replication/basebackup.c' - * we're basically simulating what pg_basebackup does, but with rsync rather * than the BASEBACKUP replication protocol command. + * + * *However* currently we'll always copy the contents of the 'pg_replslot' + * directory and delete later if appropriate. */ if (is_directory) { @@ -4318,12 +4349,6 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, appendPQExpBuffer(&rsync_flags, "%s", " --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*"); - if (server_version_num >= 90400) - { - appendPQExpBuffer(&rsync_flags, "%s", - " --exclude=pg_replslot/*"); - } - maxlen_snprintf(script, "rsync %s %s:%s/* %s", rsync_flags.data, host_string, remote_path, local_path); } diff --git a/repmgr.h b/repmgr.h index 8901b92c..f19ead5c 100644 --- a/repmgr.h +++ b/repmgr.h @@ -100,6 +100,7 @@ typedef struct struct BackupLabel { XLogRecPtr start_wal_location; + char start_wal_file[MAXLEN]; XLogRecPtr checkpoint_location; char backup_from[MAXLEN]; char backup_method[MAXLEN]; From 5a2a8d1c82b52bd7afac187a613bff2313390bbb Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 6 Apr 2016 11:57:34 +0900 Subject: [PATCH 039/113] Update HISTORY --- HISTORY | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY b/HISTORY index 5c10f257..54dfce8d 100644 --- a/HISTORY +++ b/HISTORY @@ -1,6 +1,8 @@ 3.1.2 2016-04- Fix pg_ctl path generation in do_standby_switchover() (Ian) Regularly sync witness server repl_nodes table (Ian) + (Experimental) ensure repmgr handles failover slots when copying + in rsync mode (Craig, Ian) 3.1.1 2016-02-24 Add '-P/--pwprompt' option for "repmgr create witness" (Ian) From a538ceb0ead3eae268081ff9c98362c5cafb96e7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 6 Apr 2016 14:15:19 +0900 Subject: [PATCH 040/113] Remove duplicate inclusion from header file --- repmgr.h | 1 - 1 file changed, 1 deletion(-) diff --git a/repmgr.h b/repmgr.h index f19ead5c..780dccf7 100644 --- a/repmgr.h +++ b/repmgr.h @@ -32,7 +32,6 @@ #define MIN_SUPPORTED_VERSION "9.3" #define MIN_SUPPORTED_VERSION_NUM 90300 -#include "config.h" #define ERRBUFF_SIZE 512 #define DEFAULT_WAL_KEEP_SEGMENTS "5000" From 2946c097f06abff6a56c406afa384b14d0ccc6a4 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 8 Apr 2016 15:46:55 +0900 Subject: [PATCH 041/113] repmgrd: rename some variables to better match the system functions they're populated from --- repmgrd.c | 79 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 7bdee2e5..6b1c3d27 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -696,15 +696,15 @@ standby_monitor(void) PGresult *res; char monitor_standby_timestamp[MAXLEN]; char last_wal_master_location[MAXLEN]; - char last_wal_standby_received[MAXLEN]; - char last_wal_standby_applied[MAXLEN]; - char last_wal_standby_applied_timestamp[MAXLEN]; - bool last_wal_standby_received_gte_replayed; + char last_xlog_receive_location[MAXLEN]; + char last_xlog_replay_location[MAXLEN]; + char last_xact_replay_timestamp[MAXLEN]; + bool last_xlog_receive_location_gte_replayed; char sqlquery[QUERY_STR_LEN]; - XLogRecPtr lsn_master; - XLogRecPtr lsn_standby_received; - XLogRecPtr lsn_standby_applied; + XLogRecPtr lsn_master_current_xlog_location; + XLogRecPtr lsn_last_xlog_receive_location; + XLogRecPtr lsn_last_xlog_replay_location; int connection_retries, ret; @@ -998,10 +998,10 @@ standby_monitor(void) } strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN); - strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN); - strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN); - strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN); - last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0) + strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN); + strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN); + strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN); + last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0) ? true : false; @@ -1010,13 +1010,13 @@ standby_monitor(void) /* * In the unusual event of a standby becoming disconnected from the primary, * while this repmgrd remains connected to the primary, subtracting - * "lsn_standby_applied" from "lsn_standby_received" and coercing to + * "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to * (long long unsigned int) will result in a meaningless, very large * value which will overflow a BIGINT column and spew error messages into the * PostgreSQL log. In the absence of a better strategy, skip attempting * to insert a monitoring record. */ - if (last_wal_standby_received_gte_replayed == false) + if (last_xlog_receive_location_gte_replayed == false) { log_verbose(LOG_WARNING, "Invalid replication_lag value calculated - is this standby connected to its upstream?\n"); @@ -1038,29 +1038,40 @@ standby_monitor(void) PQclear(res); /* Calculate the lag */ - lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL); - lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL); - lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL); + lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL); + lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); + lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); /* * Build the SQL to execute on master */ sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_monitor " - " (primary_node, standby_node, " - " last_monitor_time, last_apply_time, " - " last_wal_primary_location, last_wal_standby_location, " - " replication_lag, apply_lag ) " - " VALUES(%d, %d, " - " '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, " - " '%s', '%s', " - " %llu, %llu) ", + " (primary_node, " + " standby_node, " + " last_monitor_time, " + " last_apply_time, " + " last_wal_primary_location, " + " last_wal_standby_location, " + " replication_lag, " + " apply_lag ) " + " VALUES(%d, " + " %d, " + " '%s'::TIMESTAMP WITH TIME ZONE, " + " '%s'::TIMESTAMP WITH TIME ZONE, " + " '%s', " + " '%s', " + " %llu, " + " %llu) ", get_repmgr_schema_quoted(master_conn), - master_options.node, local_options.node, - monitor_standby_timestamp, last_wal_standby_applied_timestamp, - last_wal_master_location, last_wal_standby_received, - (long long unsigned int)(lsn_master - lsn_standby_received), - (long long unsigned int)(lsn_standby_received - lsn_standby_applied)); + master_options.node, + local_options.node, + monitor_standby_timestamp, + last_xact_replay_timestamp, + last_wal_master_location, + last_xlog_receive_location, + (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location), + (long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location)); /* * Execute the query asynchronously, but don't check for a result. We will @@ -1098,7 +1109,7 @@ do_master_failover(void) XLogRecPtr xlog_recptr; bool lsn_format_ok; - char last_wal_standby_applied[MAXLEN]; + char last_xlog_replay_location[MAXLEN]; PGconn *node_conn = NULL; @@ -1281,8 +1292,8 @@ do_master_failover(void) " considered as new master and exit.\n"), PQerrorMessage(my_local_conn)); PQclear(res); - sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); - update_shared_memory(last_wal_standby_applied); + sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0); + update_shared_memory(last_xlog_replay_location); terminate(ERR_DB_QUERY); } /* write last location in shared memory */ @@ -2099,7 +2110,7 @@ terminate(int retval) static void -update_shared_memory(char *last_wal_standby_applied) +update_shared_memory(char *last_xlog_replay_location) { PGresult *res; char sqlquery[QUERY_STR_LEN]; @@ -2107,7 +2118,7 @@ update_shared_memory(char *last_wal_standby_applied) sprintf(sqlquery, "SELECT %s.repmgr_update_standby_location('%s')", get_repmgr_schema_quoted(my_local_conn), - last_wal_standby_applied); + last_xlog_replay_location); /* If an error happens, just inform about that and continue */ res = PQexec(my_local_conn, sqlquery); From d104f2a914bf78695599f5627bb8d7de3c8bbf87 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 12 Apr 2016 15:37:44 +0900 Subject: [PATCH 042/113] Update HISTORY --- HISTORY | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/HISTORY b/HISTORY index 54dfce8d..48f4aa40 100644 --- a/HISTORY +++ b/HISTORY @@ -1,8 +1,10 @@ -3.1.2 2016-04- +3.1.2 2016-04-12 Fix pg_ctl path generation in do_standby_switchover() (Ian) Regularly sync witness server repl_nodes table (Ian) + Documentation improvements (Gianni, dhyannataraj) (Experimental) ensure repmgr handles failover slots when copying in rsync mode (Craig, Ian) + rsync mode handling fixes (Martín) 3.1.1 2016-02-24 Add '-P/--pwprompt' option for "repmgr create witness" (Ian) From e100728b93842015d145b213534a5f1aac8ab732 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 12 Apr 2016 15:43:38 +0900 Subject: [PATCH 043/113] Update HISTORY --- HISTORY | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY b/HISTORY index 48f4aa40..5b5eafc8 100644 --- a/HISTORY +++ b/HISTORY @@ -5,6 +5,7 @@ (Experimental) ensure repmgr handles failover slots when copying in rsync mode (Craig, Ian) rsync mode handling fixes (Martín) + Enable repmgr to compile against 9.6devel (Ian) 3.1.1 2016-02-24 Add '-P/--pwprompt' option for "repmgr create witness" (Ian) From db63b5bb1c12850f86afb19afd1852ba5fe09426 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 12 Apr 2016 16:07:38 +0900 Subject: [PATCH 044/113] Fix hint message formatting --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index b01520c0..469abef9 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2095,7 +2095,7 @@ stop_backup: * add a hint about using the -F/--force. */ - log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"")); + log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"\n")); /* Log the event - if we can connect to the primary */ From 274a30efa53485dc2dd1240923fcf71fc5659d6e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 12 Apr 2016 16:17:50 +0900 Subject: [PATCH 045/113] Fix pre-9.6 wal_level check --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 469abef9..49d480ba 100644 --- a/repmgr.c +++ b/repmgr.c @@ -5093,7 +5093,7 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error) char **levels; int j = 0; - if (server_version_num < 90500) + if (server_version_num < 90600) { levels = (char **)levels_pre96; wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'"); From 10e47441a2c995ae6a9ebe9d9e67e5a83bd93c70 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 6 May 2016 17:34:46 -0300 Subject: [PATCH 046/113] The commit fixes problems not taking in account while working on the issue with rsync returning non-zero status on vanishing files on commit 83e5f981711c0675c0c5b275eb8326ede86ff2f5. Alvaro Herrera gave me some tips which pointed me in the correct direction. This was reported by sungjae lee --- repmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repmgr.c b/repmgr.c index 49d480ba..0fbe7d6f 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1665,7 +1665,7 @@ do_standby_clone(void) It's quite common for this to happen on the data directory, particularly with long running rsync on a busy server. */ - if (r != 0 && r != 24) + if (!WIFEXITED(r) && WEXITSTATUS(r) != 24) { log_warning(_("standby clone: failed copying master data directory '%s'\n"), master_data_directory); @@ -1751,7 +1751,7 @@ do_standby_clone(void) It's quite common for this to happen on the data directory, particularly with long running rsync on a busy server. */ - if (r != 0 && r != 24) + if (!WIFEXITED(r) && WEXITSTATUS(r) != 24) { log_warning(_("standby clone: failed copying tablespace directory '%s'\n"), tblspc_dir_src.data); From d5e24689a48c79e265119c426b1964a40a535cd7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 10 May 2016 11:45:03 +0900 Subject: [PATCH 047/113] Don't terminate a standby's repmgrd if self-promotion fails due to master reappearing Per GitHub #173 --- repmgrd.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 6b1c3d27..aea9bfb3 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1473,6 +1473,8 @@ do_master_failover(void) terminate(ERR_FAILOVER_FAIL); } + log_debug("best candidate node id is %i\n", best_candidate.node_id); + /* if local node is the best candidate, promote it */ if (best_candidate.node_id == local_options.node) { @@ -1484,7 +1486,7 @@ do_master_failover(void) log_notice(_("this node is the best candidate to be the new master, promoting...\n")); - log_debug(_("promote command is: \"%s\"\n"), + log_debug("promote command is: \"%s\"\n", local_options.promote_command); if (log_type == REPMGR_STDERR && *local_options.logfile) @@ -1495,6 +1497,33 @@ do_master_failover(void) r = system(local_options.promote_command); if (r != 0) { + int master_node_id; + + /* + * Check whether the primary reappeared, which will have caused the + * promote command to fail + */ + my_local_conn = establish_db_connection(local_options.conninfo, false); + + if (my_local_conn != NULL) + { + master_conn = get_master_connection(my_local_conn, + local_options.cluster_name, + &master_node_id, NULL); + + if (master_conn != NULL && master_node_id == failed_master.node_id) + { + log_notice(_("Original master reappeared before this standby was promoted - no action taken\n")); + + PQfinish(master_conn); + /* no failover occurred but we'll want to restart connections */ + failover_done = true; + return; + } + + PQfinish(my_local_conn); + } + log_err(_("promote command failed. You could check and try it manually.\n")); terminate(ERR_DB_QUERY); From 4dbbf4019659e8f853a768a5f67147e1e5ede43b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 10 May 2016 13:58:59 +0900 Subject: [PATCH 048/113] Don't follow the promotion candidate standby if the primary reappears --- repmgrd.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index aea9bfb3..6404edcc 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1497,8 +1497,6 @@ do_master_failover(void) r = system(local_options.promote_command); if (r != 0) { - int master_node_id; - /* * Check whether the primary reappeared, which will have caused the * promote command to fail @@ -1507,6 +1505,8 @@ do_master_failover(void) if (my_local_conn != NULL) { + int master_node_id; + master_conn = get_master_connection(my_local_conn, local_options.cluster_name, &master_node_id, NULL); @@ -1557,9 +1557,38 @@ do_master_failover(void) PQExpBufferData event_details; initPQExpBuffer(&event_details); + /* wait */ sleep(10); + /* + * Check whether the primary reappeared while we were waiting, so we + * don't end up following the promotion candidate + */ + my_local_conn = establish_db_connection(local_options.conninfo, false); + if (my_local_conn != NULL) + { + int master_node_id; + + master_conn = get_master_connection(my_local_conn, + local_options.cluster_name, + &master_node_id, NULL); + + if (master_conn != NULL && master_node_id == failed_master.node_id) + { + log_notice(_("Original master reappeared - no action taken\n")); + + PQfinish(master_conn); + /* no failover occurred but we'll want to restart connections */ + failover_done = true; + return; + } + + PQfinish(my_local_conn); + } + + /* XXX double-check the promotion candidate did become the new primary */ + log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"), best_candidate.node_id); From b0f6b7bad7b93fcebe956c5eec963391704d7126 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 11 May 2016 08:29:55 +0900 Subject: [PATCH 049/113] repmgrd: rename variable for clarity --- repmgrd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 6404edcc..ddc51d71 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -716,7 +716,7 @@ standby_monitor(void) t_node_info upstream_node; int active_master_id; - const char *type = NULL; + const char *upstream_node_type = NULL; /* * Verify that the local node is still available - if not there's @@ -744,7 +744,7 @@ standby_monitor(void) local_options.node, &upstream_node_id, upstream_conninfo); - type = upstream_node_id == master_options.node + upstream_node_type = (upstream_node_id == master_options.node) ? "master" : "upstream"; @@ -754,7 +754,7 @@ standby_monitor(void) * we cannot reconnect, try to get a new upstream node. */ - check_connection(&upstream_conn, type, upstream_conninfo); + check_connection(&upstream_conn, upstream_node_type, upstream_conninfo); /* * This takes up to local_options.reconnect_attempts * * local_options.reconnect_interval seconds @@ -767,7 +767,7 @@ standby_monitor(void) if (local_options.failover == MANUAL_FAILOVER) { - log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), type); + log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type); for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++) { From 7fd44a3d7485697eff12c4e9c0596bce0d2ef2e6 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 11 May 2016 09:23:06 +0900 Subject: [PATCH 050/113] Suppress gnu_printf format warning --- log.c | 5 +++-- log.h | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/log.c b/log.c index 7acbd019..348b6db4 100644 --- a/log.c +++ b/log.c @@ -40,7 +40,8 @@ /* #define REPMGR_DEBUG */ static int detect_log_facility(const char *facility); -static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap); +static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap) +__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0))); int log_type = REPMGR_STDERR; int log_level = LOG_NOTICE; @@ -48,7 +49,7 @@ int last_log_level = LOG_NOTICE; int verbose_logging = false; int terse_logging = false; -void +extern void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) { va_list arglist; diff --git a/log.h b/log.h index 8d1cf10f..e717be98 100644 --- a/log.h +++ b/log.h @@ -25,7 +25,7 @@ #define REPMGR_SYSLOG 1 #define REPMGR_STDERR 2 -void +extern void stderr_log_with_level(const char *level_name, int level, const char *fmt,...) __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); @@ -123,8 +123,10 @@ bool logger_shutdown(void); void logger_set_verbose(void); void logger_set_terse(void); -void log_hint(const char *fmt, ...); -void log_verbose(int level, const char *fmt, ...); +void log_hint(const char *fmt, ...) +__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2))); +void log_verbose(int level, const char *fmt, ...) +__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3))); extern int log_type; extern int log_level; From 54d3c7a4ca3d4880702cb5e0c8556329f88cea63 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 11 May 2016 09:52:02 +0900 Subject: [PATCH 051/113] repmgrd: avoid additional connection to local instance in do_master_failover() --- repmgrd.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index ddc51d71..ec6f6420 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1423,9 +1423,6 @@ do_master_failover(void) PQfinish(node_conn); } - /* Close the connection to this server */ - PQfinish(my_local_conn); - my_local_conn = NULL; /* * determine which one is the best candidate to promote to master @@ -1480,6 +1477,10 @@ do_master_failover(void) { PQExpBufferData event_details; + /* Close the connection to this server */ + PQfinish(my_local_conn); + my_local_conn = NULL; + initPQExpBuffer(&event_details); /* wait */ sleep(5); @@ -1555,6 +1556,7 @@ do_master_failover(void) { PGconn *new_master_conn; PQExpBufferData event_details; + int master_node_id; initPQExpBuffer(&event_details); @@ -1565,28 +1567,23 @@ do_master_failover(void) * Check whether the primary reappeared while we were waiting, so we * don't end up following the promotion candidate */ - my_local_conn = establish_db_connection(local_options.conninfo, false); - if (my_local_conn != NULL) + + master_conn = get_master_connection(my_local_conn, + local_options.cluster_name, + &master_node_id, NULL); + + if (master_conn != NULL && master_node_id == failed_master.node_id) { - int master_node_id; + log_notice(_("Original master reappeared - no action taken\n")); - master_conn = get_master_connection(my_local_conn, - local_options.cluster_name, - &master_node_id, NULL); - - if (master_conn != NULL && master_node_id == failed_master.node_id) - { - log_notice(_("Original master reappeared - no action taken\n")); - - PQfinish(master_conn); - /* no failover occurred but we'll want to restart connections */ - failover_done = true; - return; - } - - PQfinish(my_local_conn); + PQfinish(master_conn); + /* no failover occurred but we'll want to restart connections */ + failover_done = true; + return; } + PQfinish(my_local_conn); + /* XXX double-check the promotion candidate did become the new primary */ log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"), @@ -1601,6 +1598,9 @@ do_master_failover(void) fflush(stderr); } + /* Close the connection to this server */ + PQfinish(my_local_conn); + my_local_conn = NULL; log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command); From 57f9432692dad8554f0b2a893ae47907acd90372 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 11 May 2016 21:47:40 +0900 Subject: [PATCH 052/113] Add missing newlines in log messages --- dbutils.c | 2 +- repmgrd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbutils.c b/dbutils.c index 243231cb..d996e46c 100644 --- a/dbutils.c +++ b/dbutils.c @@ -612,7 +612,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, if (!PQntuples(res)) { - log_notice(_("no record found for upstream server")); + log_notice(_("no record found for upstream server\n")); PQclear(res); return NULL; } diff --git a/repmgrd.c b/repmgrd.c index ec6f6420..1c3c182d 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1712,7 +1712,7 @@ do_upstream_standby_failover(t_node_info upstream_node) if (PQntuples(res) == 0) { - log_err(_("no node with id %i found"), upstream_node_id); + log_err(_("no node with id %i found\n"), upstream_node_id); PQclear(res); return false; } From 21b2ff1a1fd84504ecf727e0c06a6ed0f7d6a3c8 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 11 May 2016 21:47:40 +0900 Subject: [PATCH 053/113] repmgrd: better handling of missing upstream_node_id Ensure we default to master node. --- dbutils.c | 35 +++++++++++++++++++++++++++++++---- repmgrd.c | 9 +++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/dbutils.c b/dbutils.c index d996e46c..7764d99f 100644 --- a/dbutils.c +++ b/dbutils.c @@ -604,7 +604,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, if (PQresultStatus(res) != PGRES_TUPLES_OK) { - log_err(_("unable to get conninfo for upstream server\n%s\n"), + log_err(_("error when attempting to find upstream server\n%s\n"), PQerrorMessage(standby_conn)); PQclear(res); return NULL; @@ -612,15 +612,42 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, if (!PQntuples(res)) { - log_notice(_("no record found for upstream server\n")); PQclear(res); - return NULL; + log_debug("no record found for upstream server\n"); + + sqlquery_snprintf(sqlquery, + " SELECT un.conninfo, un.name, un.id " + " FROM %s.repl_nodes un " + " WHERE un.cluster = '%s' " + " AND un.type='master' " + " AND un.active IS TRUE", + get_repmgr_schema_quoted(standby_conn), + cluster); + res = PQexec(standby_conn, sqlquery); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_err(_("error when attempting to find active master server\n%s\n"), + PQerrorMessage(standby_conn)); + PQclear(res); + return NULL; + } + + if (!PQntuples(res)) + { + PQclear(res); + log_notice(_("no record found for active master server\n")); + + return NULL; + } + + log_debug("record found for active master server\n"); } strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO); if (upstream_node_id_ptr != NULL) - *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1)); + *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 2)); PQclear(res); diff --git a/repmgrd.c b/repmgrd.c index 1c3c182d..ae10aa08 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -742,7 +742,8 @@ standby_monitor(void) upstream_conn = get_upstream_connection(my_local_conn, local_options.cluster_name, local_options.node, - &upstream_node_id, upstream_conninfo); + &upstream_node_id, + upstream_conninfo); upstream_node_type = (upstream_node_id == master_options.node) ? "master" @@ -826,7 +827,7 @@ standby_monitor(void) * Failover handling is handled differently depending on whether * the failed node is the master or a cascading standby */ - upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id); + upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); if (upstream_node.type == MASTER) { @@ -929,7 +930,7 @@ standby_monitor(void) * from the upstream node to write monitoring information */ - upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id); + upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); sprintf(sqlquery, "SELECT id " @@ -2397,7 +2398,7 @@ get_node_info(PGconn *conn, char *cluster, int node_id) if (res == 0) { - log_warning(_("No record found record for node %i\n"), node_id); + log_warning(_("No record found for node %i\n"), node_id); } return node_info; From 0a798bf6e448afa4e7968527b22ebb43b24dfd12 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 12 May 2016 09:52:22 +0900 Subject: [PATCH 054/113] Comment fixes and formatting tweaks --- repmgrd.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index ae10aa08..68981d72 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -749,10 +749,9 @@ standby_monitor(void) ? "master" : "upstream"; - // ZZZ "5 minutes"? /* - * Check if the upstream node is still available, if after 5 minutes of retries - * we cannot reconnect, try to get a new upstream node. + * Check that the upstream node is still available + * If not, initiate failover process */ check_connection(&upstream_conn, upstream_node_type, upstream_conninfo); @@ -819,11 +818,9 @@ standby_monitor(void) else if (local_options.failover == AUTOMATIC_FAILOVER) { /* - * When we returns from this function we will have a new master + * When we return from this function we will have a new master * and a new master_conn - */ - - /* + * * Failover handling is handled differently depending on whether * the failed node is the master or a cascading standby */ @@ -1878,7 +1875,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo) static bool set_local_node_status(void) { - PGresult *res; + PGresult *res; char sqlquery[QUERY_STR_LEN]; int active_master_node_id = NODE_NOT_FOUND; char master_conninfo[MAXLEN]; From 2eb00a3e6f422c61d88ea464d68fc80120943336 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 12 May 2016 09:56:29 +0900 Subject: [PATCH 055/113] Remove unneeded column --- dbutils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbutils.c b/dbutils.c index 7764d99f..7624c414 100644 --- a/dbutils.c +++ b/dbutils.c @@ -587,7 +587,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, upstream_conninfo = upstream_conninfo_out; sqlquery_snprintf(sqlquery, - " SELECT un.conninfo, un.name, un.id " + " SELECT un.conninfo, un.id " " FROM %s.repl_nodes un " "INNER JOIN %s.repl_nodes n " " ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)" @@ -616,7 +616,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, log_debug("no record found for upstream server\n"); sqlquery_snprintf(sqlquery, - " SELECT un.conninfo, un.name, un.id " + " SELECT un.conninfo, un.id " " FROM %s.repl_nodes un " " WHERE un.cluster = '%s' " " AND un.type='master' " @@ -647,7 +647,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO); if (upstream_node_id_ptr != NULL) - *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 2)); + *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1)); PQclear(res); From beda22d5f9880dc1feb657f4826ad93e125a96ee Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 12 May 2016 13:06:10 +0900 Subject: [PATCH 056/113] Correct check for wal_level in 9.3 --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 0fbe7d6f..6e7f10bb 100644 --- a/repmgr.c +++ b/repmgr.c @@ -5067,7 +5067,7 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error) char *wal_error_message = NULL; /* Check that WAL level is set correctly */ - if (server_version_num < 90300) + if (server_version_num < 90400) { i = guc_set(conn, "wal_level", "=", "hot_standby"); wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby'"); From 247823db4dc442ea408fa019060660ff52583f80 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 12 May 2016 14:05:44 +0900 Subject: [PATCH 057/113] Remove extraneous PQfinish() --- repmgrd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 68981d72..5bdda400 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1580,7 +1580,10 @@ do_master_failover(void) return; } + + /* Close the connection to this server */ PQfinish(my_local_conn); + my_local_conn = NULL; /* XXX double-check the promotion candidate did become the new primary */ @@ -1596,9 +1599,6 @@ do_master_failover(void) fflush(stderr); } - /* Close the connection to this server */ - PQfinish(my_local_conn); - my_local_conn = NULL; log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command); From e814c1120e6cdc2e3a0a53effa355ac2189b51c0 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 12 May 2016 21:48:17 +0900 Subject: [PATCH 058/113] repmgrd: handle situations where streaming replication is inactive --- repmgrd.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 5bdda400..24249000 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -718,6 +718,7 @@ standby_monitor(void) int active_master_id; const char *upstream_node_type = NULL; + bool receiving_streamed_wal = true; /* * Verify that the local node is still available - if not there's * no point in doing much else anyway @@ -999,10 +1000,24 @@ standby_monitor(void) strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN); strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN); strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN); + last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0) ? true : false; + /* + * If pg_last_xlog_receive_location is NULL, this means we're in archive + * recovery and will need to calculate lag based on pg_last_xlog_replay_location + */ + + /* + * Replayed WAL is greater than received streamed WAL + */ + if (PQgetisnull(res, 0, 1)) + { + receiving_streamed_wal = false; + } + PQclear(res); /* @@ -1014,11 +1029,10 @@ standby_monitor(void) * PostgreSQL log. In the absence of a better strategy, skip attempting * to insert a monitoring record. */ - if (last_xlog_receive_location_gte_replayed == false) + if (receiving_streamed_wal == true && last_xlog_receive_location_gte_replayed == false) { log_verbose(LOG_WARNING, - "Invalid replication_lag value calculated - is this standby connected to its upstream?\n"); - return; + "Replayed WAL newer than received WAL - is this standby connected to its upstream?\n"); } /* Get master xlog info */ @@ -1037,9 +1051,18 @@ standby_monitor(void) /* Calculate the lag */ lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL); - lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); + lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); + if (last_xlog_receive_location_gte_replayed == false) + { + lsn_last_xlog_receive_location = lsn_last_xlog_replay_location; + } + else + { + lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); + } + /* * Build the SQL to execute on master */ From 209de699cee506da32a2b1a28e4d7013c8333e6c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 16 May 2016 13:51:08 +0900 Subject: [PATCH 059/113] README.md: improve documentation of `repl_status` view --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 02fbb201..7366782a 100644 --- a/README.md +++ b/README.md @@ -1016,8 +1016,11 @@ Monitoring ---------- When `repmgrd` is running with the option `-m/--monitoring-history`, it will -constantly write node status information to the `repl_monitor` table, which can -be queried easily using the view `repl_status`: +constantly write standby node status information to the `repl_monitor` table, +providing a near-real time overview of replication status on all nodes +in the cluster. + +The view `repl_status` shows the most recent state for each node, e.g.: repmgr=# SELECT * FROM repmgr_test.repl_status; -[ RECORD 1 ]-------------+----------------------------- @@ -1042,6 +1045,10 @@ table , it's advisable to regularly purge historical data with `repmgr cluster cleanup`; use the `-k/--keep-history` to specify how many day's worth of data should be retained. +Note that when a standby node is not streaming directly from its upstream +node, i.e. recovering WAL from an archive, `apply_lag` will always +appear as `0 bytes`. + Using a witness server with repmgrd ------------------------------------ From 4c463a66b77e2edccb3d76b7215212db71b93550 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 17 May 2016 10:37:14 +0900 Subject: [PATCH 060/113] Update HISTORY --- HISTORY | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/HISTORY b/HISTORY index 5b5eafc8..9e594e10 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,12 @@ +3.1.3 2016-05-17 + repmgrd: enable monitoring when a standby is catching up by + replaying archived WAL (Ian) + repmgrd: when upstream_node_id is NULL, assume upstream node + to be current master (Ian) + repmgrd: check for reappearance of the master node if standby + promotion fails (Ian) + improve handling of rsync failure conditions (Martín) + 3.1.2 2016-04-12 Fix pg_ctl path generation in do_standby_switchover() (Ian) Regularly sync witness server repl_nodes table (Ian) From 9a05999abb3bc5ef0ed4a2eb8662d16b6e965150 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 17 May 2016 17:24:02 +0900 Subject: [PATCH 061/113] Fix log formatting --- dbutils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index 7624c414..efc0b75b 100644 --- a/dbutils.c +++ b/dbutils.c @@ -420,7 +420,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op, " WHERE name = '%s' AND setting::%s %s '%s'::%s", parameter, datatype, op, value, datatype); - log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery); + log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) From b6b6439819a5cf26026f32ef400a747637f0138d Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 11 May 2016 19:30:32 -0300 Subject: [PATCH 062/113] I copied over the rmtree function (and other functions needed by this one) from the postgresql code so we use that instead of issuing system calls with rm -rf .... I also eliminated the rm -rf for pg_xlog. Will later do the same with the other system call to remove files in pg_replslot/ --- Makefile | 2 +- dirmod.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ dirmod.h | 23 +++++++ repmgr.c | 15 +++-- repmgr.h | 1 + 5 files changed, 228 insertions(+), 7 deletions(-) create mode 100644 dirmod.c create mode 100644 dirmod.h diff --git a/Makefile b/Makefile index 1fe7ec6b..bbf778fd 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ HEADERS = $(wildcard *.h) repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o -repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o +repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o DATA = repmgr.sql uninstall_repmgr.sql diff --git a/dirmod.c b/dirmod.c new file mode 100644 index 00000000..82837f66 --- /dev/null +++ b/dirmod.c @@ -0,0 +1,194 @@ +/* + * + * dirmod.c + * directory handling functions + * + * Copyright (C) 2ndQuadrant, 2010-2016 + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "postgres_fe.h" + +/* Don't modify declarations in system headers */ + +#include +#include +#include + +/* + * pgfnames + * + * return a list of the names of objects in the argument directory. Caller + * must call pgfnames_cleanup later to free the memory allocated by this + * function. + */ +char ** +pgfnames(const char *path) +{ + DIR *dir; + struct dirent *file; + char **filenames; + int numnames = 0; + int fnsize = 200; /* enough for many small dbs */ + + dir = opendir(path); + if (dir == NULL) + { + return NULL; + } + + filenames = (char **) palloc(fnsize * sizeof(char *)); + + while (errno = 0, (file = readdir(dir)) != NULL) + { + if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0) + { + if (numnames + 1 >= fnsize) + { + fnsize *= 2; + filenames = (char **) repalloc(filenames, + fnsize * sizeof(char *)); + } + filenames[numnames++] = pstrdup(file->d_name); + } + } + + if (errno) + { + fprintf(stderr, _("could not read directory \"%s\": %s\n"), + path, strerror(errno)); + } + + filenames[numnames] = NULL; + + if (closedir(dir)) + { + fprintf(stderr, _("could not close directory \"%s\": %s\n"), + path, strerror(errno)); + } + + return filenames; +} + + +/* + * pgfnames_cleanup + * + * deallocate memory used for filenames + */ +void +pgfnames_cleanup(char **filenames) +{ + char **fn; + + for (fn = filenames; *fn; fn++) + pfree(*fn); + + pfree(filenames); +} + + +/* + * rmtree + * + * Delete a directory tree recursively. + * Assumes path points to a valid directory. + * Deletes everything under path. + * If rmtopdir is true deletes the directory too. + * Returns true if successful, false if there was any problem. + * (The details of the problem are reported already, so caller + * doesn't really have to say anything more, but most do.) + */ +bool +rmtree(const char *path, bool rmtopdir) +{ + bool result = true; + char pathbuf[MAXPGPATH]; + char **filenames; + char **filename; + struct stat statbuf; + + /* + * we copy all the names out of the directory before we start modifying + * it. + */ + filenames = pgfnames(path); + + if (filenames == NULL) + return false; + + /* now we have the names we can start removing things */ + for (filename = filenames; *filename; filename++) + { + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename); + + /* + * It's ok if the file is not there anymore; we were just about to + * delete it anyway. + * + * This is not an academic possibility. One scenario where this + * happens is when bgwriter has a pending unlink request for a file in + * a database that's being dropped. In dropdb(), we call + * ForgetDatabaseFsyncRequests() to flush out any such pending unlink + * requests, but because that's asynchronous, it's not guaranteed that + * the bgwriter receives the message in time. + */ + if (lstat(pathbuf, &statbuf) != 0) + { + if (errno != ENOENT) + { + result = false; + } + continue; + } + + if (S_ISDIR(statbuf.st_mode)) + { + /* call ourselves recursively for a directory */ + if (!rmtree(pathbuf, true)) + { + /* we already reported the error */ + result = false; + } + } + else + { + if (unlink(pathbuf) != 0) + { + if (errno != ENOENT) + { + result = false; + } + } + } + } + + if (rmtopdir) + { + if (rmdir(path) != 0) + { + result = false; + } + } + + pgfnames_cleanup(filenames); + + return result; +} + diff --git a/dirmod.h b/dirmod.h new file mode 100644 index 00000000..47caf982 --- /dev/null +++ b/dirmod.h @@ -0,0 +1,23 @@ +/* + * dirmod.h + * Copyright (c) 2ndQuadrant, 2010-2016 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef _DIRMOD_H_ +#define _DIRMOD_H_ + +#endif diff --git a/repmgr.c b/repmgr.c index 6e7f10bb..aa340b62 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1332,6 +1332,8 @@ do_standby_clone(void) char *first_wal_segment = NULL; char *last_wal_segment = NULL; + char xlog_dir[MAXLEN] = ""; + PQExpBufferData event_details; @@ -1997,15 +1999,16 @@ stop_backup: * Remove any existing WAL from the target directory, since * rsync's --exclude option doesn't do it. */ - maxlen_snprintf(script, "rm -rf %s/pg_xlog/*", - local_data_directory); - r = system(script); - if (r != 0) + + maxlen_snprintf(xlog_dir, "%s/pg_xlog/", local_data_directory); + + if (!rmtree(xlog_dir, false)) { - log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"), - local_data_directory); + log_err(_("unable to empty local WAL directory %s\n"), + xlog_dir); exit(ERR_BAD_RSYNC); } + } /* diff --git a/repmgr.h b/repmgr.h index 780dccf7..4141c7fd 100644 --- a/repmgr.h +++ b/repmgr.h @@ -28,6 +28,7 @@ #include "dbutils.h" #include "errcode.h" #include "config.h" +#include "dirmod.h" #define MIN_SUPPORTED_VERSION "9.3" #define MIN_SUPPORTED_VERSION_NUM 90300 From 1c49c4159c2f352b18830abe60f041dfe947d2b0 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 13 May 2016 17:27:11 -0300 Subject: [PATCH 063/113] Add -X stream parameter to pg_basebackup to ensure that we get all the WALs needed without needing to set wal_keep_segments to a ridiculously high value. This is not necessary on 9.6 if we are using replication slots, as all WAL segments needed will be kept on the primary until they are consumed by the slot. --- repmgr.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index aa340b62..2c315891 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4414,6 +4414,18 @@ run_basebackup(const char *data_dir) } } + /* + * To ensure we have all the WALs needed during basebackup execution we stream + * them as the backup is taking place. + * Not necessary if on 9.6 if we have replication slots set in repmgr.conf + * (starting at 9.6 there is an option, which we use, to reserve the LSN at + * creation time) + */ + if (server_version_num < 90600 || !options.use_replication_slots) + { + appendPQExpBuffer(¶ms, " -X stream"); + } + maxlen_snprintf(script, "%s -l \"repmgr base backup\" %s %s", make_pg_path("pg_basebackup"), @@ -4426,7 +4438,7 @@ run_basebackup(const char *data_dir) /* * As of 9.4, pg_basebackup only ever returns 0 or 1 - */ + */ r = system(script); From c30609426a871b14e958a1d3575680889c7cb434 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 17 May 2016 15:18:19 -0300 Subject: [PATCH 064/113] Fix several inconsistencies added in d5d8eb2bcb8862607799d602af620e5ca98bc837 --- repmgr.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/repmgr.c b/repmgr.c index 2c315891..b925e48d 100644 --- a/repmgr.c +++ b/repmgr.c @@ -87,7 +87,7 @@ static bool create_recovery_file(const char *data_dir); static int test_ssh_connection(char *host, char *remote_user); static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory, int server_version_num); -static int run_basebackup(const char *data_dir); +static int run_basebackup(const char *data_dir, int server_version); static void check_parameters_for_action(const int action); static bool create_schema(PGconn *conn); static void write_primary_conninfo(char *line); @@ -1847,7 +1847,7 @@ do_standby_clone(void) } else { - r = run_basebackup(local_data_directory); + r = run_basebackup(local_data_directory, get_server_version(upstream_conn, NULL)); if (r != 0) { log_warning(_("standby clone: base backup failed\n")); @@ -4374,12 +4374,12 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, static int -run_basebackup(const char *data_dir) +run_basebackup(const char *data_dir, int server_version) { char script[MAXLEN]; int r = 0; PQExpBufferData params; - TablespaceListCell *cell; + TablespaceListCell *cell; /* Create pg_basebackup command line options */ @@ -4421,9 +4421,19 @@ run_basebackup(const char *data_dir) * (starting at 9.6 there is an option, which we use, to reserve the LSN at * creation time) */ - if (server_version_num < 90600 || !options.use_replication_slots) + if (server_version < 90600 || !options.use_replication_slots) { - appendPQExpBuffer(¶ms, " -X stream"); + /* + * We're going to check first if the user set the xlog method in the repmgr.conf + * file. We don't want to have conflits with pg_basebackup due to specifying the + * method twice. + */ + const char xlog_short[4] = "-X "; + const char xlog_long[14] = "--xlog-method"; + if (strstr(options.pg_basebackup_options, xlog_short) == NULL && strstr(options.pg_basebackup_options, xlog_long) == NULL ) + { + appendPQExpBuffer(¶ms, " -X stream"); + } } maxlen_snprintf(script, From cf46834041fae439782cb19ce0fb4cf747775d3a Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 17 May 2016 15:19:20 -0300 Subject: [PATCH 065/113] Add new option pg_restore_command. This can be used so that repmgr standby clone adds the string specified in repmgr.conf as a restore_command in recovery.conf. We can use this option for integration with barman by setting the parameter to an appropriate get-wal call. --- config.c | 6 +++++- config.h | 3 ++- repmgr.c | 9 +++++++++ repmgr.conf.sample | 4 ++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/config.c b/config.c index c32166bb..dccac61c 100644 --- a/config.c +++ b/config.c @@ -224,6 +224,7 @@ parse_config(t_configuration_options *options) memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options)); memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options)); + memset(options->pg_restore_command, 0, sizeof(options->pg_restore_command)); /* default master_response_timeout is 60 seconds */ options->master_response_timeout = 60; @@ -340,7 +341,8 @@ parse_config(t_configuration_options *options) strncpy(options->follow_command, value, MAXLEN); else if (strcmp(name, "master_response_timeout") == 0) options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false); - /* 'primary_response_timeout' as synonym for 'master_response_timeout' - + /* + * 'primary_response_timeout' as synonym for 'master_response_timeout' - * we'll switch terminology in a future release (3.1?) */ else if (strcmp(name, "primary_response_timeout") == 0) @@ -372,6 +374,8 @@ parse_config(t_configuration_options *options) parse_event_notifications_list(options, value); else if (strcmp(name, "tablespace_mapping") == 0) tablespace_list_append(options, value); + else if (strcmp(name, "pg_restore_command") == 0) + strncpy(options->pg_restore_command, value, MAXLEN); else { known_parameter = false; diff --git a/config.h b/config.h index b5f3099b..37186444 100644 --- a/config.h +++ b/config.h @@ -72,6 +72,7 @@ typedef struct char pg_bindir[MAXLEN]; char pg_ctl_options[MAXLEN]; char pg_basebackup_options[MAXLEN]; + char pg_restore_command[MAXLEN]; char logfile[MAXLEN]; int monitor_interval_secs; int retry_promote_interval_secs; @@ -82,7 +83,7 @@ typedef struct TablespaceList tablespace_mapping; } t_configuration_options; -#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } typedef struct ErrorListCell { diff --git a/repmgr.c b/repmgr.c index b925e48d..507f8a25 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4219,6 +4219,15 @@ create_recovery_file(const char *data_dir) log_debug(_("recovery.conf: %s"), line); } + /* If pg_restore_command is set, we use it as resore_cmmand in recovery.conf */ + if (strcmp(options.pg_restore_command, "") != 0) + { + maxlen_snprintf(line, "restore_command = '%s'\n", + options.pg_restore_command); + if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false) + return false; + log_debug(_("recovery.conf: %s"), line); + } fclose(recovery_file); return true; diff --git a/repmgr.conf.sample b/repmgr.conf.sample index fc2bd5b4..5964f14d 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -113,6 +113,10 @@ # # tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace +# You can specify a restore_command to be used in the recovery.conf that +# will be placed in the cloned standby +# +# pg_resore_command = cp /path/to/archived/wals/%f %p # Failover settings (repmgrd) # --------------------------- From 45178c19d8a6b4cb93623093b2add247faab6844 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 21 May 2016 21:04:19 -0400 Subject: [PATCH 066/113] Fix compiler warning For a char * variable, '\0' is just a strange way to write NULL, and clang warns about it. --- config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.c b/config.c index dccac61c..6a63c0a0 100644 --- a/config.c +++ b/config.c @@ -28,7 +28,7 @@ static void parse_event_notifications_list(t_configuration_options *options, con static void tablespace_list_append(t_configuration_options *options, const char *arg); static void exit_with_errors(ErrorList *config_errors); -const static char *_progname = '\0'; +const static char *_progname = NULL; static char config_file_path[MAXPGPATH]; static bool config_file_provided = false; bool config_file_found = false; From 0daa7381b37e1ed0bb3a52cf422824458ef7e3c4 Mon Sep 17 00:00:00 2001 From: Gianni Ciolli Date: Thu, 19 May 2016 19:42:26 +0200 Subject: [PATCH 067/113] Debian auto-build version upgrade --- debian/DEBIAN/control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/DEBIAN/control b/debian/DEBIAN/control index 698b0850..4ca9a262 100644 --- a/debian/DEBIAN/control +++ b/debian/DEBIAN/control @@ -1,9 +1,9 @@ Package: repmgr-auto -Version: 3.0.1 +Version: 3.1.3 Section: database Priority: optional Architecture: all -Depends: rsync, postgresql-9.3 | postgresql-9.4 +Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5 Maintainer: Self built package Description: PostgreSQL replication setup, magament and monitoring has two main executables From 3937670d14c8988d7c81b65d35125d200e452694 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 23 May 2016 13:30:05 -0300 Subject: [PATCH 068/113] Added logrotate configuration file (for those who would like one ;)) and changed parameters in sysconfig file so that we use /var/log for logs and /var/run for pid files. --- RHEL/repmgrd.logrotate | 14 ++++++++++++++ RHEL/repmgrd.sysconfig | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 RHEL/repmgrd.logrotate diff --git a/RHEL/repmgrd.logrotate b/RHEL/repmgrd.logrotate new file mode 100644 index 00000000..6ff27afa --- /dev/null +++ b/RHEL/repmgrd.logrotate @@ -0,0 +1,14 @@ +/var/log/repmgr/repmgr.log { + missingok + copytruncate + compress + notifempty + sharedscripts + create 0640 postgres postgres + nodateext + weekly + rotate 8 + postrotate + /bin/kill -HUP `cat /var/run/repmgr/repmgrd.pid 2>/dev/null` 2> /dev/null || true + endscript +} diff --git a/RHEL/repmgrd.sysconfig b/RHEL/repmgrd.sysconfig index 07136a50..5d60ff58 100644 --- a/RHEL/repmgrd.sysconfig +++ b/RHEL/repmgrd.sysconfig @@ -15,7 +15,7 @@ REPMGRD_ENABLED=no #REPMGRD_BIN=/usr/bin/repmgrd # pid file -#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid +#REPMGRD_PIDFILE=/var/run/repmgr/repmgrd.pid # log file -#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log +#REPMGRD_LOG=/var/log/repmgr/repmgrd.log From 560066fa9da832406dd9a8115baf29a8c9084879 Mon Sep 17 00:00:00 2001 From: Gianni Ciolli Date: Mon, 23 May 2016 19:00:25 +0200 Subject: [PATCH 069/113] Basic CSV mode for "repmgr cluster show", enabled by --csv command line option --- repmgr.c | 60 ++++++++++++++++++++++++++++++++++++++++---------------- repmgr.h | 3 ++- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/repmgr.c b/repmgr.c index 507f8a25..74dbc830 100644 --- a/repmgr.c +++ b/repmgr.c @@ -187,6 +187,7 @@ main(int argc, char **argv) {"config-archive-dir", required_argument, NULL, 5}, {"pg_rewind", optional_argument, NULL, 6}, {"pwprompt", optional_argument, NULL, 7}, + {"csv", no_argument, NULL, 8}, {"help", no_argument, NULL, '?'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0} @@ -427,6 +428,9 @@ main(int argc, char **argv) case 7: runtime_options.witness_pwprompt = true; break; + case 8: + runtime_options.csv_mode = true; + break; default: { @@ -764,7 +768,7 @@ do_cluster_show(void) conn = establish_db_connection(options.conninfo, true); sqlquery_snprintf(sqlquery, - "SELECT conninfo, type, name, upstream_node_name" + "SELECT conninfo, type, name, upstream_node_name, id" " FROM %s.repl_show_nodes", get_repmgr_schema_quoted(conn)); @@ -813,21 +817,24 @@ do_cluster_show(void) upstream_length = upstream_length_cur; } - printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header); - printf("----------+-"); + if (! runtime_options.csv_mode) + { + printf("Role | %-*s | %-*s | Connection String\n", name_length, name_header, upstream_length, upstream_header); + printf("----------+-"); - for (i = 0; i < name_length; i++) - printf("-"); + for (i = 0; i < name_length; i++) + printf("-"); - printf("-|-"); - for (i = 0; i < upstream_length; i++) - printf("-"); + printf("-|-"); + for (i = 0; i < upstream_length; i++) + printf("-"); - printf("-|-"); - for (i = 0; i < conninfo_length; i++) - printf("-"); + printf("-|-"); + for (i = 0; i < conninfo_length; i++) + printf("-"); - printf("\n"); + printf("\n"); + } for (i = 0; i < PQntuples(res); i++) { @@ -841,11 +848,20 @@ do_cluster_show(void) else strcpy(node_role, "* master"); - printf("%-10s", node_role); - printf("| %-*s ", name_length, PQgetvalue(res, i, 2)); - printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3)); - printf("| %s\n", PQgetvalue(res, i, 0)); - + if (runtime_options.csv_mode) + { + int connection_status = + (PQstatus(conn) == CONNECTION_OK) ? + (is_standby(conn) ? 1 : 0) : -1; + printf("%s,%d\n", PQgetvalue(res, i, 4), connection_status); + } + else + { + printf("%-10s", node_role); + printf("| %-*s ", name_length, PQgetvalue(res, i, 2)); + printf("| %-*s ", upstream_length, PQgetvalue(res, i, 3)); + printf("| %s\n", PQgetvalue(res, i, 0)); + } PQfinish(conn); } @@ -4129,6 +4145,7 @@ do_help(void) printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \ " optionally providing a path to the binary\n")); printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n")); + printf(_(" --csv (cluster show) output in CSV mode (0 = master, 1 = standby, -1 = down)\n")); /* printf(_(" --initdb-no-pwprompt (witness server) no superuser password prompt during initdb\n"));*/ printf(_(" -P, --pwprompt (witness server) prompt for password when creating users\n")); printf(_(" -S, --superuser=USERNAME (witness server) superuser username for witness database\n" \ @@ -4658,6 +4675,15 @@ check_parameters_for_action(const int action) } } + /* Warn about parameters which apply to CLUSTER SHOW only */ + if (action != CLUSTER_SHOW) + { + if (runtime_options.csv_mode) + { + error_list_append(&cli_warnings, _("--csv can only be used when executing CLUSTER SHOW")); + } + } + return; } diff --git a/repmgr.h b/repmgr.h index 4141c7fd..358dec92 100644 --- a/repmgr.h +++ b/repmgr.h @@ -70,6 +70,7 @@ typedef struct bool rsync_only; bool fast_checkpoint; bool ignore_external_config_files; + bool csv_mode; char pg_ctl_mode[MAXLEN]; char masterport[MAXLEN]; /* @@ -95,7 +96,7 @@ typedef struct bool initdb_no_pwprompt; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } struct BackupLabel { From 9b2a907b0919f552383752c526ffec3763b1df47 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 24 May 2016 16:44:22 +0900 Subject: [PATCH 070/113] Convert erroneously forgotten printf debug to proper logging --- repmgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/repmgr.c b/repmgr.c index 74dbc830..100122bd 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1694,8 +1694,6 @@ do_standby_clone(void) /* XXX ensure this function does not exit on error as we'd need to stop the backup */ read_backup_label(local_data_directory, &backup_label); - printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file); - /* Handle tablespaces */ sqlquery_snprintf(sqlquery, @@ -2016,7 +2014,7 @@ stop_backup: * rsync's --exclude option doesn't do it. */ - maxlen_snprintf(xlog_dir, "%s/pg_xlog/", local_data_directory); + maxlen_snprintf(xlog_dir, "%s/pg_xlog/", local_data_directory); if (!rmtree(xlog_dir, false)) { @@ -2043,7 +2041,7 @@ stop_backup: * functionality of replication slots */ if (server_version_num >= 90400 && - backup_label.min_failover_slot_lsn == InvalidXLogRecPtr) + backup_label.min_failover_slot_lsn == InvalidXLogRecPtr) { maxlen_snprintf(script, "rm -rf %s/pg_replslot/*", local_data_directory); @@ -2300,6 +2298,8 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back } (void) fclose(label_file); + + log_debug(_("read_backup_label: label is %s; start wal file is %s\n"), out_backup_label->label, out_backup_label->start_wal_file); } static void From b14d8ddb7418f8c2c39325f384220458d245237b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 24 May 2016 17:02:32 +0900 Subject: [PATCH 071/113] Ensure read_backup_label() does not exit on error This would leave an unstopped backup; we'll let do_standby_clone() do any cleanup necessary before exiting with an error. --- repmgr.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/repmgr.c b/repmgr.c index 100122bd..8a1649e4 100644 --- a/repmgr.c +++ b/repmgr.c @@ -121,7 +121,7 @@ static bool remote_command(const char *host, const char *user, const char *comma static void format_db_cli_params(const char *conninfo, char *output); static bool copy_file(const char *old_filename, const char *new_filename); -static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label); +static bool read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label); /* Global variables */ static const char *keywords[6]; @@ -1691,8 +1691,11 @@ do_standby_clone(void) } /* Read backup label copied from primary */ - /* XXX ensure this function does not exit on error as we'd need to stop the backup */ - read_backup_label(local_data_directory, &backup_label); + if (read_backup_label(local_data_directory, &backup_label) == false) + { + r = retval = ERR_BAD_BACKUP_LABEL; + goto stop_backup; + } /* Handle tablespaces */ @@ -2168,8 +2171,7 @@ parse_label_lsn(const char *label_key, const char *label_value) { log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"), label_key, label_value); - - exit(ERR_BAD_BACKUP_LABEL); + return InvalidXLogRecPtr; } return ptr; @@ -2190,7 +2192,7 @@ parse_label_lsn(const char *label_key, const char *label_value) * *====================================== */ -static void +static bool read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label) { char label_path[MAXPGPATH]; @@ -2215,7 +2217,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back { log_err(_("read_backup_label: could not open backup label file %s: %s"), label_path, strerror(errno)); - exit(ERR_BAD_BACKUP_LABEL); + return false; } log_info(_("read_backup_label: parsing backup label file '%s'\n"), @@ -2237,7 +2239,7 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back { log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""), label_key, label_value); - exit(ERR_BAD_BACKUP_LABEL); + return false; } log_debug("standby clone: got backup label entry \"%s: %s\"\n", @@ -2249,14 +2251,19 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back char wal_filename[MAXLEN]; nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename); + if (nmatches != 2) { log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n")); - exit(ERR_BAD_BACKUP_LABEL); + return false; } + out_backup_label->start_wal_location = parse_label_lsn(&label_key[0], start_wal_location); + if (out_backup_label->start_wal_location == InvalidXLogRecPtr) + return false; + (void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN); out_backup_label->start_wal_file[MAXLEN-1] = '\0'; } @@ -2264,6 +2271,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back { out_backup_label->checkpoint_location = parse_label_lsn(&label_key[0], &label_value[0]); + + if (out_backup_label->checkpoint_location == InvalidXLogRecPtr) + return false; } else if (strcmp(label_key, "BACKUP METHOD") == 0) { @@ -2289,6 +2299,9 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back { out_backup_label->min_failover_slot_lsn = parse_label_lsn(&label_key[0], &label_value[0]); + + if (out_backup_label->min_failover_slot_lsn == InvalidXLogRecPtr) + return false; } else { @@ -2300,6 +2313,8 @@ read_backup_label(const char *local_data_directory, struct BackupLabel *out_back (void) fclose(label_file); log_debug(_("read_backup_label: label is %s; start wal file is %s\n"), out_backup_label->label, out_backup_label->start_wal_file); + + return true; } static void From d7e85f7565e33dcda7afa05cc1fd15dcdb913d8c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 30 May 2016 07:47:09 +0900 Subject: [PATCH 072/113] Clarify purpose of remapping code --- repmgr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 8a1649e4..a38c127a 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1777,7 +1777,12 @@ do_standby_clone(void) goto stop_backup; } - /* Update symlinks in pg_tblspc */ + /* + * If valid tablespace mappings were provided, arrange for the affected + * tablespaces to be remapped + * (if no tablespace mappings were provided, non-default tablespaces + * will be copied as-is by pg_basebackup or rsync) + */ if (mapping_found == true) { /* 9.5 and later - create a tablespace_map file */ From 3bcea46c3b44b299b31610da8a4350235cefbef9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 30 May 2016 10:48:08 +0900 Subject: [PATCH 073/113] Clarify handling of tablespace_map file. --- repmgr.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/repmgr.c b/repmgr.c index a38c127a..03b926e8 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1697,7 +1697,7 @@ do_standby_clone(void) goto stop_backup; } - /* Handle tablespaces */ + /* Copy tablespaces and, if required, remap to a new location */ sqlquery_snprintf(sqlquery, " SELECT oid, pg_tablespace_location(oid) AS spclocation " @@ -1734,7 +1734,6 @@ do_standby_clone(void) appendPQExpBuffer(&tblspc_dir_src, "%s", PQgetvalue(res, i, 1)); /* Check if tablespace path matches one of the provided tablespace mappings */ - if (options.tablespace_mapping.head != NULL) { for (cell = options.tablespace_mapping.head; cell; cell = cell->next) @@ -1778,14 +1777,14 @@ do_standby_clone(void) } /* - * If valid tablespace mappings were provided, arrange for the affected - * tablespaces to be remapped - * (if no tablespace mappings were provided, non-default tablespaces - * will be copied as-is by pg_basebackup or rsync) + * If a valid mapping was provide for this tablespace, arrange for it to + * be remapped + * (if no tablespace mappings was provided, the link will be copied as-is + * by pg_basebackup or rsync and no action is required) */ if (mapping_found == true) { - /* 9.5 and later - create a tablespace_map file */ + /* 9.5 and later - append to the tablespace_map file */ if (server_version_num >= 90500) { tablespace_map_rewrite = true; @@ -1829,6 +1828,13 @@ do_standby_clone(void) PQclear(res); + /* + * For 9.5 and later, if tablespace remapping was requested, we'll need + * to rewrite the tablespace map file ourselves. + * The tablespace map file is read on startup and any links created by + * the backend; we could do this ourselves like for pre-9.5 servers, but + * it's better to rely on functionality the backend provides. + */ if (server_version_num >= 90500 && tablespace_map_rewrite == true) { PQExpBufferData tablespace_map_filename; @@ -1841,7 +1847,9 @@ do_standby_clone(void) /* Unlink any existing file (it should be there, but we don't care if it isn't) */ if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT) { - log_err(_("unable to remove tablespace_map file %s\n"), tablespace_map_filename.data); + log_err(_("unable to remove tablespace_map file %s: %s\n"), + tablespace_map_filename.data, + strerror(errno)); r = retval = ERR_BAD_BASEBACKUP; goto stop_backup; From afc904f876bef8af6821a6b9da3a21bd0543e0e3 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 30 May 2016 16:01:38 +0900 Subject: [PATCH 074/113] Fix typos and whitespace --- config.c | 2 +- repmgr.c | 4 ++-- repmgr.conf.sample | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.c b/config.c index 6a63c0a0..38b66740 100644 --- a/config.c +++ b/config.c @@ -375,7 +375,7 @@ parse_config(t_configuration_options *options) else if (strcmp(name, "tablespace_mapping") == 0) tablespace_list_append(options, value); else if (strcmp(name, "pg_restore_command") == 0) - strncpy(options->pg_restore_command, value, MAXLEN); + strncpy(options->pg_restore_command, value, MAXLEN); else { known_parameter = false; diff --git a/repmgr.c b/repmgr.c index 03b926e8..1cba47c7 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4264,10 +4264,10 @@ create_recovery_file(const char *data_dir) log_debug(_("recovery.conf: %s"), line); } - /* If pg_restore_command is set, we use it as resore_cmmand in recovery.conf */ + /* If pg_restore_command is set, we use it as restore_cmmand in recovery.conf */ if (strcmp(options.pg_restore_command, "") != 0) { - maxlen_snprintf(line, "restore_command = '%s'\n", + maxlen_snprintf(line, "restore_command = '%s'\n", options.pg_restore_command); if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false) return false; diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 5964f14d..2d46e7d9 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -116,7 +116,7 @@ # You can specify a restore_command to be used in the recovery.conf that # will be placed in the cloned standby # -# pg_resore_command = cp /path/to/archived/wals/%f %p +# pg_restore_command = cp /path/to/archived/wals/%f %p # Failover settings (repmgrd) # --------------------------- From f18d629bd29c541ec82e76b7320202a4a2ac777e Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 30 May 2016 09:41:38 -0300 Subject: [PATCH 075/113] Typo in a comment. Reported by @nucfisher --- repmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.c b/repmgr.c index 1cba47c7..536dbe15 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4264,7 +4264,7 @@ create_recovery_file(const char *data_dir) log_debug(_("recovery.conf: %s"), line); } - /* If pg_restore_command is set, we use it as restore_cmmand in recovery.conf */ + /* If pg_restore_command is set, we use it as restore_command in recovery.conf */ if (strcmp(options.pg_restore_command, "") != 0) { maxlen_snprintf(line, "restore_command = '%s'\n", From 0dd617cfca8483fcd124f6f993587c1f59cc3383 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 2 Jun 2016 13:56:06 -0300 Subject: [PATCH 076/113] The ALTER TABLE to set the foreign key as DEFERRABLE only worked on 9.4+, as there is no ALTER CONSTRAINT in 9.3. This new ALTER TABLE does the same in two hops by removing the foreign key and creating it again in the same ALTER TABLE. This fixes #183 --- sql/repmgr3.1.1_repmgr3.1.2.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/repmgr3.1.1_repmgr3.1.2.sql b/sql/repmgr3.1.1_repmgr3.1.2.sql index 7dedacc8..36b3dc79 100644 --- a/sql/repmgr3.1.1_repmgr3.1.2.sql +++ b/sql/repmgr3.1.1_repmgr3.1.2.sql @@ -27,5 +27,6 @@ BEGIN; -ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE; +ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey, + ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE; COMMIT; From 384618cb332b276c5b57415e88b3fa6905f7cdb4 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 2 Jun 2016 14:23:14 -0300 Subject: [PATCH 077/113] There was a missing table in sql/repmgr2_repmgr3.sql which made events error when trying to insert them. This is just a copy and paste from the table creation in repmgr.c This fixes #184 reported by Andreas Kretschmer --- sql/repmgr2_repmgr3.sql | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sql/repmgr2_repmgr3.sql b/sql/repmgr2_repmgr3.sql index f99e84d0..a4eacd86 100644 --- a/sql/repmgr2_repmgr3.sql +++ b/sql/repmgr2_repmgr3.sql @@ -63,6 +63,15 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id; -- UPDATE repl_nodes SET active = FALSE WHERE id IN (...); +/* There's also an event table which we need to create */ +CREATE TABLE repl_events ( + node_id INTEGER NOT NULL, + event TEXT NOT NULL, + successful BOOLEAN NOT NULL DEFAULT TRUE, + event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + details TEXT NULL +); + /* When you're sure of your changes, commit them */ -- COMMIT; From cc610f995df688fdec883098afe396f27a10b96a Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 3 Jun 2016 21:25:22 +0900 Subject: [PATCH 078/113] Whitespace and typo fixes --- repmgr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/repmgr.c b/repmgr.c index 536dbe15..3580e7a2 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1,3 +1,4 @@ + /* * repmgr.c - Command interpreter for the repmgr package * Copyright (C) 2ndQuadrant, 2010-2016 @@ -4477,16 +4478,16 @@ run_basebackup(const char *data_dir, int server_version) */ if (server_version < 90600 || !options.use_replication_slots) { - /* + /* * We're going to check first if the user set the xlog method in the repmgr.conf - * file. We don't want to have conflits with pg_basebackup due to specifying the + * file. We don't want to have conflicts with pg_basebackup due to specifying the * method twice. */ - const char xlog_short[4] = "-X "; + const char xlog_short[4] = "-X "; const char xlog_long[14] = "--xlog-method"; if (strstr(options.pg_basebackup_options, xlog_short) == NULL && strstr(options.pg_basebackup_options, xlog_long) == NULL ) { - appendPQExpBuffer(¶ms, " -X stream"); + appendPQExpBuffer(¶ms, " -X stream"); } } From 46ff9fb587ecd43d8f93fa394013b065ba49b609 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 3 Jun 2016 16:03:03 -0300 Subject: [PATCH 079/113] No code change, just indentation was incorrect in the failover part making it hard to read. --- repmgrd.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 24249000..d0f7af51 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -827,37 +827,37 @@ standby_monitor(void) */ upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id); - if (upstream_node.type == MASTER) - { - log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"), - node_info.upstream_node_id); - do_master_failover(); - } - else - { - log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"), - node_info.upstream_node_id); + if (upstream_node.type == MASTER) + { + log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"), + node_info.upstream_node_id); + do_master_failover(); + } + else + { + log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"), + node_info.upstream_node_id); - if (!do_upstream_standby_failover(upstream_node)) - { - PQExpBufferData errmsg; + if (!do_upstream_standby_failover(upstream_node)) + { + PQExpBufferData errmsg; initPQExpBuffer(&errmsg); appendPQExpBuffer(&errmsg, - _("unable to reconnect to new upstream node, terminating...")); + _("unable to reconnect to new upstream node, terminating...")); log_err("%s\n", errmsg.data); create_event_record(master_conn, - &local_options, - local_options.node, - "repmgrd_shutdown", - false, - errmsg.data); + &local_options, + local_options.node, + "repmgrd_shutdown", + false, + errmsg.data); terminate(ERR_DB_CON); } - } + } return; } } From 951879f80df531c68d9ade45c3777f5feb3e6d34 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 3 Jun 2016 20:19:02 -0300 Subject: [PATCH 080/113] Typo noticed by Brett Maton. --- repmgr.conf.sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 2d46e7d9..0558b085 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -19,7 +19,7 @@ # Node ID and name # (Note: we recommend to avoid naming nodes after their initial -# replication funcion, as this will cause confusion when e.g. +# replication function, as this will cause confusion when e.g. # "standby2" is promoted to primary) #node=2 # a unique integer #node_name=node2 # an arbitrary (but unique) string; we recommend using From b6ebd34e2f664949b096d1f0eebb68b726238307 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 3 Jun 2016 20:20:22 -0300 Subject: [PATCH 081/113] Some other indentation fixes found --- repmgrd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index d0f7af51..8217baf6 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1140,8 +1140,8 @@ do_master_failover(void) */ t_node_info nodes[FAILOVER_NODES_MAX_CHECK]; - /* Store details of the failed node here */ - t_node_info failed_master = T_NODE_INFO_INITIALIZER; + /* Store details of the failed node here */ + t_node_info failed_master = T_NODE_INFO_INITIALIZER; /* Store details of the best candidate for promotion to master here */ t_node_info best_candidate = T_NODE_INFO_INITIALIZER; @@ -1151,7 +1151,7 @@ do_master_failover(void) "SELECT id, conninfo, type, upstream_node_id " " FROM %s.repl_nodes " " WHERE cluster = '%s' " - " AND active IS TRUE " + " AND active IS TRUE " " AND priority > 0 " " ORDER BY priority DESC, id " " LIMIT %i ", From 005640be51e17caffa57ad1fb60945b7bf8d71ac Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Sun, 5 Jun 2016 10:20:42 +0900 Subject: [PATCH 082/113] Fix PQconninfoParse() return type check --- dbutils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index efc0b75b..7138fc81 100644 --- a/dbutils.c +++ b/dbutils.c @@ -538,7 +538,7 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output) conninfo_options = PQconninfoParse(conninfo, NULL); - if (conninfo_options == false) + if (conninfo_options == NULL) { log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo); return false; From 0d42b771f5452bf6f73d677efd16922a023f3242 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 6 Jun 2016 23:41:47 +0900 Subject: [PATCH 083/113] Remove unnecessary get_server_version() calls in do_standby_clone() We cache the value at the start of the function, and it's reasonable to assume that the server version is not going to suddenly change. --- repmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repmgr.c b/repmgr.c index 3580e7a2..c8a0b7cf 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1412,7 +1412,7 @@ do_standby_clone(void) if (*runtime_options.recovery_min_apply_delay) { - if (get_server_version(upstream_conn, NULL) < 90400) + if (server_version_num < 90400) { log_err(_("PostgreSQL 9.4 or greater required for --recovery-min-apply-delay\n")); PQfinish(upstream_conn); @@ -1436,7 +1436,7 @@ do_standby_clone(void) { TablespaceListCell *cell; - if (get_server_version(upstream_conn, NULL) < 90400 && !runtime_options.rsync_only) + if (server_version_num < 90400 && !runtime_options.rsync_only) { log_err(_("in PostgreSQL 9.3, tablespace mapping can only be used in conjunction with --rsync-only\n")); PQfinish(upstream_conn); @@ -1878,7 +1878,7 @@ do_standby_clone(void) } else { - r = run_basebackup(local_data_directory, get_server_version(upstream_conn, NULL)); + r = run_basebackup(local_data_directory, server_version_num); if (r != 0) { log_warning(_("standby clone: base backup failed\n")); From 66fd003ab46e5d37210ef54c4e249b1f6c469140 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 10 Jun 2016 17:58:10 +0900 Subject: [PATCH 084/113] Schema-qualify pg_catalog objects --- repmgrd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 8217baf6..9e012155 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -667,7 +667,7 @@ witness_monitor(void) " replication_lag, apply_lag )" " VALUES(%d, %d, " " '%s'::TIMESTAMP WITH TIME ZONE, NULL, " - " pg_current_xlog_location(), NULL, " + " pg_catalog.pg_current_xlog_location(), NULL, " " 0, 0) ", get_repmgr_schema_quoted(my_local_conn), master_options.node, @@ -983,9 +983,11 @@ standby_monitor(void) /* Get local xlog info */ sqlquery_snprintf(sqlquery, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), " - "pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()"); + "SELECT CURRENT_TIMESTAMP, " + "pg_catalog.pg_last_xlog_receive_location(), " + "pg_catalog.pg_last_xlog_replay_location(), " + "pg_catalog.pg_last_xact_replay_timestamp(), " + "pg_catalog.pg_last_xlog_receive_location() >= pg_catalog.pg_last_xlog_replay_location()"); res = PQexec(my_local_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -1991,7 +1993,7 @@ check_cluster_configuration(PGconn *conn) log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema()); sqlquery_snprintf(sqlquery, - "SELECT oid FROM pg_class " + "SELECT oid FROM pg_catalog.pg_class " " WHERE oid = '%s.repl_nodes'::regclass ", get_repmgr_schema_quoted(master_conn)); res = PQexec(conn, sqlquery); From 1ade1acb22d7aed0fa8584e5841afbe919f915ae Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 15 Jun 2016 15:41:10 +0900 Subject: [PATCH 085/113] Report standby location as last apply location when in archive recovery Otherwise the monitoring table's 'last_wal_standby_location' will stay at the location of the last streaming WAL received. This complements the bugfix applied in e814c1120e6cdc2e3a0a53effa355ac2189b51c0. --- repmgrd.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 9e012155..50168587 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -695,7 +695,7 @@ standby_monitor(void) { PGresult *res; char monitor_standby_timestamp[MAXLEN]; - char last_wal_master_location[MAXLEN]; + char last_wal_primary_location[MAXLEN]; char last_xlog_receive_location[MAXLEN]; char last_xlog_replay_location[MAXLEN]; char last_xact_replay_timestamp[MAXLEN]; @@ -1048,17 +1048,23 @@ standby_monitor(void) return; } - strncpy(last_wal_master_location, PQgetvalue(res, 0, 0), MAXLEN); + strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); /* Calculate the lag */ - lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL); + lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL); lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); if (last_xlog_receive_location_gte_replayed == false) { + /* + * We're not receiving streaming WAL - in this case the receive location + * equals the last replayed location + */ + lsn_last_xlog_receive_location = lsn_last_xlog_replay_location; + strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN); } else { @@ -1091,7 +1097,7 @@ standby_monitor(void) local_options.node, monitor_standby_timestamp, last_xact_replay_timestamp, - last_wal_master_location, + last_wal_primary_location, last_xlog_receive_location, (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location), (long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location)); From 3d6c349d885ac222830ebb22cfddf764d5756169 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 17 Jun 2016 14:43:02 +0900 Subject: [PATCH 086/113] Rename "pg_restore_command" to "restore_command" The 'pg_' prefix could cause confusion with actual binaries (pg_ctl, pg_basebackup etc.) and there's no obvious reason why we need it. --- config.c | 6 +++--- config.h | 2 +- repmgr.c | 6 +++--- repmgr.conf.sample | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config.c b/config.c index 38b66740..4af25c24 100644 --- a/config.c +++ b/config.c @@ -224,7 +224,7 @@ parse_config(t_configuration_options *options) memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options)); memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options)); - memset(options->pg_restore_command, 0, sizeof(options->pg_restore_command)); + memset(options->restore_command, 0, sizeof(options->restore_command)); /* default master_response_timeout is 60 seconds */ options->master_response_timeout = 60; @@ -374,8 +374,8 @@ parse_config(t_configuration_options *options) parse_event_notifications_list(options, value); else if (strcmp(name, "tablespace_mapping") == 0) tablespace_list_append(options, value); - else if (strcmp(name, "pg_restore_command") == 0) - strncpy(options->pg_restore_command, value, MAXLEN); + else if (strcmp(name, "restore_command") == 0) + strncpy(options->restore_command, value, MAXLEN); else { known_parameter = false; diff --git a/config.h b/config.h index 37186444..3449ad3c 100644 --- a/config.h +++ b/config.h @@ -72,7 +72,7 @@ typedef struct char pg_bindir[MAXLEN]; char pg_ctl_options[MAXLEN]; char pg_basebackup_options[MAXLEN]; - char pg_restore_command[MAXLEN]; + char restore_command[MAXLEN]; char logfile[MAXLEN]; int monitor_interval_secs; int retry_promote_interval_secs; diff --git a/repmgr.c b/repmgr.c index c8a0b7cf..c1ba3187 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4265,11 +4265,11 @@ create_recovery_file(const char *data_dir) log_debug(_("recovery.conf: %s"), line); } - /* If pg_restore_command is set, we use it as restore_command in recovery.conf */ - if (strcmp(options.pg_restore_command, "") != 0) + /* If restore_command is set, we use it as restore_command in recovery.conf */ + if (strcmp(options.restore_command, "") != 0) { maxlen_snprintf(line, "restore_command = '%s'\n", - options.pg_restore_command); + options.restore_command); if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false) return false; log_debug(_("recovery.conf: %s"), line); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 0558b085..4cfa3d71 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -116,7 +116,7 @@ # You can specify a restore_command to be used in the recovery.conf that # will be placed in the cloned standby # -# pg_restore_command = cp /path/to/archived/wals/%f %p +# restore_command = cp /path/to/archived/wals/%f %p # Failover settings (repmgrd) # --------------------------- From 5d8b1a3a31795263b6ab98aa3c4fb460284f1a4a Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 20 Jun 2016 10:55:25 +0900 Subject: [PATCH 087/113] monitoring: ensure that invalid replication_lag value is not inserted. Per Github #189. --- repmgrd.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 50168587..a6437cb1 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -706,6 +706,9 @@ standby_monitor(void) XLogRecPtr lsn_last_xlog_receive_location; XLogRecPtr lsn_last_xlog_replay_location; + long long unsigned int replication_lag; + long long unsigned int apply_lag; + int connection_retries, ret; bool did_retry = false; @@ -1051,24 +1054,39 @@ standby_monitor(void) strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); - /* Calculate the lag */ + lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL); - lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); + lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); + + /* Calculate apply lag */ if (last_xlog_receive_location_gte_replayed == false) { /* * We're not receiving streaming WAL - in this case the receive location * equals the last replayed location */ - - lsn_last_xlog_receive_location = lsn_last_xlog_replay_location; + apply_lag = 0; strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN); } else { - lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); + apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location; + } + + /* Calculate replication lag */ + if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location) + { + replication_lag = (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location); + } + else + { + /* This should never happen, but in case it does set lag to zero */ + log_warning("Master xlog (%s) location appears less than standby receive location (%s)\n", + last_wal_primary_location, + last_xlog_receive_location); + replication_lag = 0; } /* @@ -1099,8 +1117,8 @@ standby_monitor(void) last_xact_replay_timestamp, last_wal_primary_location, last_xlog_receive_location, - (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location), - (long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location)); + replication_lag, + apply_lag); /* * Execute the query asynchronously, but don't check for a result. We will From 303bb22ee13fb3292d246571480004065e3daf0e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 20 Jun 2016 12:23:34 +0900 Subject: [PATCH 088/113] Note potential replication lag check improvement --- repmgrd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index a6437cb1..a72c6c35 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1040,7 +1040,12 @@ standby_monitor(void) "Replayed WAL newer than received WAL - is this standby connected to its upstream?\n"); } - /* Get master xlog info */ + /* + * Get master xlog position + * + * TODO: investigate whether pg_current_xlog_insert_location() would be a better + * choice; see: https://github.com/2ndQuadrant/repmgr/issues/189 + */ sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()"); res = PQexec(master_conn, sqlquery); From dd5b6f9f1252bb86c84b8e5e2d8fd94e455565fc Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 21 Jun 2016 16:04:41 +0900 Subject: [PATCH 089/113] Whitespace fixes --- repmgrd.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index a72c6c35..d4db87fe 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -832,18 +832,18 @@ standby_monitor(void) if (upstream_node.type == MASTER) { - log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"), - node_info.upstream_node_id); + log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"), + node_info.upstream_node_id); do_master_failover(); } else { - log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"), - node_info.upstream_node_id); + log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"), + node_info.upstream_node_id); - if (!do_upstream_standby_failover(upstream_node)) - { - PQExpBufferData errmsg; + if (!do_upstream_standby_failover(upstream_node)) + { + PQExpBufferData errmsg; initPQExpBuffer(&errmsg); appendPQExpBuffer(&errmsg, @@ -2024,8 +2024,10 @@ check_cluster_configuration(PGconn *conn) sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_catalog.pg_class " " WHERE oid = '%s.repl_nodes'::regclass ", - get_repmgr_schema_quoted(master_conn)); + get_repmgr_schema_quoted(master_conn)); + res = PQexec(conn, sqlquery); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn)); From c16ab3c889d42fe969da28235b505ec96be34160 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 21 Jun 2016 17:30:22 +0900 Subject: [PATCH 090/113] Fix handling of global PGconn variables in repmgrd Don't call PQfinish before calling terminate(), elsewhere always set to NULL after calling PQfinish(). This fixes GitHub #182. --- repmgrd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index d4db87fe..06a99c18 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -1195,7 +1195,6 @@ do_master_failover(void) { log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn)); PQclear(res); - PQfinish(my_local_conn); terminate(ERR_DB_QUERY); } @@ -1569,12 +1568,12 @@ do_master_failover(void) log_notice(_("Original master reappeared before this standby was promoted - no action taken\n")); PQfinish(master_conn); + master_conn = NULL; + /* no failover occurred but we'll want to restart connections */ failover_done = true; return; } - - PQfinish(my_local_conn); } log_err(_("promote command failed. You could check and try it manually.\n")); @@ -2446,6 +2445,8 @@ get_node_info(PGconn *conn, char *cluster, int node_id) errmsg.data); PQfinish(conn); + conn = NULL; + terminate(ERR_DB_QUERY); } From a2b5ba595ac410378d9b1a598c0b7d5003c3a16b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 23 Jun 2016 09:47:35 +0900 Subject: [PATCH 091/113] repmgrd: reword log message for clarity --- repmgrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repmgrd.c b/repmgrd.c index 06a99c18..5475d6c7 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -399,7 +399,7 @@ main(int argc, char **argv) case STANDBY: /* We need the node id of the master server as well as a connection to it */ - log_info(_("connecting to master node '%s'\n"), + log_info(_("connecting to master node of cluster '%s'\n"), local_options.cluster_name); master_conn = get_master_connection(my_local_conn, From 3fac975de6cb1f33c8ac0d55a90750ac6e7adc02 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 24 Jun 2016 09:25:41 +0900 Subject: [PATCH 092/113] Prevent multiple nodes being registered with the same name. Fixes GitHub #192. --- dbutils.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- dbutils.h | 1 + repmgr.c | 25 ++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/dbutils.c b/dbutils.c index 7138fc81..29a2f6be 100644 --- a/dbutils.c +++ b/dbutils.c @@ -31,6 +31,7 @@ char repmgr_schema[MAXLEN] = ""; char repmgr_schema_quoted[MAXLEN] = ""; +static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info); PGconn * _establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice) @@ -1681,8 +1682,7 @@ int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info) { char sqlquery[QUERY_STR_LEN]; - PGresult *res; - int ntuples; + int result; sqlquery_snprintf( sqlquery, @@ -1696,6 +1696,49 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery); + result = _get_node_record(conn, cluster, sqlquery, node_info); + + if (result == 0) + { + log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id); + } + + return result; +} + +int +get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info) +{ + char sqlquery[QUERY_STR_LEN]; + int result; + + sqlquery_snprintf( + sqlquery, + "SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active" + " FROM %s.repl_nodes " + " WHERE cluster = '%s' " + " AND node_name = %s", + get_repmgr_schema_quoted(conn), + cluster, + node_name); + + result = _get_node_record(conn, cluster, sqlquery, node_info); + + if (result == 0) + { + log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name); + } + + return result; +} + + +static int +_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info) +{ + int ntuples; + PGresult *res; + res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -1706,7 +1749,6 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info if (ntuples == 0) { - log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id); return 0; } @@ -1727,6 +1769,9 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info } + + + int get_node_replication_state(PGconn *conn, char *node_name, char *output) { diff --git a/dbutils.h b/dbutils.h index 8d980d8e..85a2b167 100644 --- a/dbutils.h +++ b/dbutils.h @@ -125,6 +125,7 @@ bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *c bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active); bool delete_node_record(PGconn *conn, int node, char *action); int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info); +int get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info); bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active); bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id); bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); diff --git a/repmgr.c b/repmgr.c index c1ba3187..d99e7d3c 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1127,8 +1127,9 @@ do_standby_register(void) PGconn *master_conn; int ret; - bool record_created; + t_node_info node_record; + int node_result; log_info(_("connecting to standby database\n")); conn = establish_db_connection(options.conninfo, true); @@ -1185,6 +1186,28 @@ do_standby_register(void) } } + /* + * Check that an active node with the same node_name doesn't exist already + */ + + node_result = get_node_record_by_name(master_conn, + options.cluster_name, + options.node_name, + &node_record); + + if (node_result) + { + if (node_record.active == true) + { + log_err(_("Node %i exists already with node_name \"%s\""), + node_record.node_id, + options.node_name); + PQfinish(master_conn); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + } + record_created = create_node_record(master_conn, "standby register", options.node, From fbb65b4a435e175ba3afc40b4ed3fc85723b6728 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 24 Jun 2016 10:19:20 +0900 Subject: [PATCH 093/113] Remove RHEL packaging files. There's no point in maintaining in parallel to the PGDG packages. See also notes in GitHub #156. --- RHEL/repmgr3-93.spec | 61 ------------------- RHEL/repmgrd.init | 133 ----------------------------------------- RHEL/repmgrd.logrotate | 14 ----- RHEL/repmgrd.sysconfig | 21 ------- 4 files changed, 229 deletions(-) delete mode 100644 RHEL/repmgr3-93.spec delete mode 100755 RHEL/repmgrd.init delete mode 100644 RHEL/repmgrd.logrotate delete mode 100644 RHEL/repmgrd.sysconfig diff --git a/RHEL/repmgr3-93.spec b/RHEL/repmgr3-93.spec deleted file mode 100644 index e5f9407e..00000000 --- a/RHEL/repmgr3-93.spec +++ /dev/null @@ -1,61 +0,0 @@ -Summary: repmgr -Name: repmgr -Version: 3.0 -Release: 1 -License: GPLv3 -Group: System Environment/Daemons -URL: http://repmgr.org -Packager: Ian Barwick -Vendor: 2ndQuadrant Limited -Distribution: centos -Source0: %{name}-%{version}.tar.gz -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root - -%description -repmgr is a utility suite which greatly simplifies -the process of setting up and managing replication -using streaming replication within a cluster of -PostgreSQL servers. - -%prep -%setup - -%build -export PATH=$PATH:/usr/pgsql-9.3/bin/ -%{__make} USE_PGXS=1 - -%install -[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot} - -export PATH=$PATH:/usr/pgsql-9.3/bin/ -%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p" -%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p" -%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p" - - -%clean -[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot} - - -%files -%defattr(-,root,root) -/usr/bin/repmgr -/usr/bin/repmgrd -/usr/pgsql-9.3/bin/repmgr -/usr/pgsql-9.3/bin/repmgrd -/usr/pgsql-9.3/lib/repmgr_funcs.so -/usr/pgsql-9.3/share/contrib/repmgr.sql -/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql -/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql -/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql -%attr(0755,root,root)/etc/init.d/repmgrd -%attr(0644,root,root)/etc/sysconfig/repmgrd -%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample - -%changelog -* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com> -- build for repmgr 3.0 -* Thu Jun 05 2014 Nathan Van Overloop 2.0.2 -- fix witness creation to create db and user if needed -* Fri Apr 04 2014 Nathan Van Overloop 2.0.1 -- initial build for RHEL6 diff --git a/RHEL/repmgrd.init b/RHEL/repmgrd.init deleted file mode 100755 index 6d79e598..00000000 --- a/RHEL/repmgrd.init +++ /dev/null @@ -1,133 +0,0 @@ -#!/bin/sh -# -# chkconfig: - 75 16 -# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL -# processname: repmgrd -# pidfile="/var/run/${NAME}.pid" - -# Source function library. -INITD=/etc/rc.d/init.d -. $INITD/functions - -# Get function listing for cross-distribution logic. -TYPESET=`typeset -f|grep "declare"` - -# Get network config. -. /etc/sysconfig/network - -DESC="PostgreSQL replication management and monitoring daemon" -NAME=repmgrd - -REPMGRD_ENABLED=no -REPMGRD_OPTS= -REPMGRD_USER=postgres -REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd -REPMGRD_PIDFILE=/var/run/repmgrd.pid -REPMGRD_LOCK=/var/lock/subsys/${NAME} -REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log - -# Read configuration variable file if it is present -[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME - -# For SELinux we need to use 'runuser' not 'su' -if [ -x /sbin/runuser ] -then - SU=runuser -else - SU=su -fi - -test -x $REPMGRD_BIN || exit 0 - -case "$REPMGRD_ENABLED" in - [Yy]*) - break - ;; - *) - exit 0 - ;; -esac - - -if [ -z "${REPMGRD_OPTS}" ] -then - echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}" - exit 0 -fi - -start() -{ - REPMGRD_START=$"Starting ${NAME} service: " - - # Make sure startup-time log file is valid - if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ] - then - touch "${REPMGRD_LOG}" || exit 1 - chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}" - chmod go-rwx "${REPMGRD_LOG}" - [ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}" - fi - - echo -n "${REPMGRD_START}" - $SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null - sleep 2 - pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null` - if [ "x${pid}" != "x" ] - then - success "${REPMGRD_START}" - touch "${REPMGRD_LOCK}" - echo $pid > "${REPMGRD_PIDFILE}" - echo - else - failure "${REPMGRD_START}" - echo - script_result=1 - fi -} - -stop() -{ - echo -n $"Stopping ${NAME} service: " - if [ -e "${REPMGRD_LOCK}" ] - then - killproc ${NAME} - ret=$? - if [ $ret -eq 0 ] - then - echo_success - rm -f "${REPMGRD_PIDFILE}" - rm -f "${REPMGRD_LOCK}" - else - echo_failure - script_result=1 - fi - else - # not running; per LSB standards this is "ok" - echo_success - fi - echo -} - - -# See how we were called. -case "$1" in - start) - start - ;; - stop) - stop - ;; - status) - status -p $REPMGRD_PIDFILE $NAME - script_result=$? - ;; - restart) - stop - start - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac - -exit $script_result diff --git a/RHEL/repmgrd.logrotate b/RHEL/repmgrd.logrotate deleted file mode 100644 index 6ff27afa..00000000 --- a/RHEL/repmgrd.logrotate +++ /dev/null @@ -1,14 +0,0 @@ -/var/log/repmgr/repmgr.log { - missingok - copytruncate - compress - notifempty - sharedscripts - create 0640 postgres postgres - nodateext - weekly - rotate 8 - postrotate - /bin/kill -HUP `cat /var/run/repmgr/repmgrd.pid 2>/dev/null` 2> /dev/null || true - endscript -} diff --git a/RHEL/repmgrd.sysconfig b/RHEL/repmgrd.sysconfig deleted file mode 100644 index 5d60ff58..00000000 --- a/RHEL/repmgrd.sysconfig +++ /dev/null @@ -1,21 +0,0 @@ -# default settings for repmgrd. This file is source by /bin/sh from -# /etc/init.d/repmgrd - -# disable repmgrd by default so it won't get started upon installation -# valid values: yes/no -REPMGRD_ENABLED=no - -# Options for repmgrd (required) -#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf" - -# User to run repmgrd as -#REPMGRD_USER=postgres - -# repmgrd binary -#REPMGRD_BIN=/usr/bin/repmgrd - -# pid file -#REPMGRD_PIDFILE=/var/run/repmgr/repmgrd.pid - -# log file -#REPMGRD_LOG=/var/log/repmgr/repmgrd.log From f1ee6e19b68786dc926bf6fac629a29c76ec1fa3 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 27 Jun 2016 11:21:54 +0900 Subject: [PATCH 094/113] Ensure configuration options correctly initialised in repmgrd.c Per GitHub #150. Also remove unused variable. --- config.c | 2 ++ config.h | 6 +++++- repmgrd.c | 6 ++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/config.c b/config.c index 4af25c24..f6b47954 100644 --- a/config.c +++ b/config.c @@ -240,6 +240,8 @@ parse_config(t_configuration_options *options) options->witness_repl_nodes_sync_interval_secs = 30; memset(options->event_notification_command, 0, sizeof(options->event_notification_command)); + options->event_notifications.head = NULL; + options->event_notifications.tail = NULL; options->tablespace_mapping.head = NULL; options->tablespace_mapping.tail = NULL; diff --git a/config.h b/config.h index 3449ad3c..bc6b92bf 100644 --- a/config.h +++ b/config.h @@ -83,7 +83,11 @@ typedef struct TablespaceList tablespace_mapping; } t_configuration_options; -#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } +/* + * The following will initialize the structure with a minimal set of options; + * actual defaults are set in parse_config() before parsing the configuration file + */ +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } } typedef struct ErrorListCell { diff --git a/repmgrd.c b/repmgrd.c index 5475d6c7..47a3ca15 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -44,11 +44,11 @@ /* Local info */ -t_configuration_options local_options; +t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER; PGconn *my_local_conn = NULL; /* Master info */ -t_configuration_options master_options; +t_configuration_options master_options = T_CONFIGURATION_OPTIONS_INITIALIZER; PGconn *master_conn = NULL; @@ -61,8 +61,6 @@ bool failover_done = false; char *pid_file = NULL; -t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER; - static void help(void); static void usage(void); static void check_cluster_configuration(PGconn *conn); From bd76d0eb92c22bfd8295d1a2ab0fdeb1031f4ba6 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 27 Jun 2016 12:32:10 +0900 Subject: [PATCH 095/113] Update postgresql.org links to https --- FAQ.md | 2 +- README.md | 10 +++++----- repmgr.conf.sample | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/FAQ.md b/FAQ.md index 0ecca1e9..2abbfdce 100644 --- a/FAQ.md +++ b/FAQ.md @@ -38,7 +38,7 @@ General No. Hash indexes and replication do not mix well and their use is explicitly discouraged; see: - http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175 + https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175 `repmgr` -------- diff --git a/README.md b/README.md index 7366782a..9f2bb979 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ This guide assumes that you are familiar with PostgreSQL administration and streaming replication concepts. For further details on streaming replication, see this link: - http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION + https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION The following terms are used throughout the `repmgr` documentation. @@ -472,7 +472,7 @@ so should be used with care. Further options can be passed to the `pg_basebackup` utility via the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL documentation for more details of available options: - http://www.postgresql.org/docs/current/static/app-pgbasebackup.html + https://www.postgresql.org/docs/current/static/app-pgbasebackup.html ### Using rsync to clone a standby @@ -608,13 +608,13 @@ place. If using the default `pg_basebackup` method, we recommend setting pg_basebackup_options='--xlog-method=stream' See the `pg_basebackup` documentation for details: - http://www.postgresql.org/docs/current/static/app-pgbasebackup.html + https://www.postgresql.org/docs/current/static/app-pgbasebackup.html Otherwise it's necessary to set `wal_keep_segments` to an appropriately high value. Further information on replication slots in the PostgreSQL documentation: - http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS + https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS Promoting a standby server with repmgr @@ -816,7 +816,7 @@ should have been updated to reflect this: - `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that data checksums were enabled when the cluster was initialized. See the `pg_rewind` documentation for details: - http://www.postgresql.org/docs/current/static/app-pgrewind.html + https://www.postgresql.org/docs/current/static/app-pgrewind.html - `repmgrd` should not be running when a switchover is carried out, otherwise the `repmgrd` may try and promote a standby by itself. - Any other standbys attached to the old master will need to be manually diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 4cfa3d71..aeab45c8 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -28,7 +28,9 @@ # Database connection information as a conninfo string # This must be accessible to all servers in the cluster; for details see: -# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING +# +# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING +# #conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' # Optional configuration items From 968c2f1954137cc9646987e357f076ddddb676d9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 27 Jun 2016 13:57:40 +0900 Subject: [PATCH 096/113] Add notes about `connect_timeout` conninfo parameter. Per suggestion in GitHub #148 --- README.md | 55 +++++++++++++++++++++++++++++++--------------- repmgr.conf.sample | 6 +++++ 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 9f2bb979..6a319c42 100644 --- a/README.md +++ b/README.md @@ -237,15 +237,19 @@ both servers. On the master server, a PostgreSQL instance must be initialised and running. The following replication settings must be included in `postgresql.conf`: + + # Enable replication connections; set this figure to at least one more + # than the number of standbys which will connect to this server + # (note that repmgr will execute `pg_basebackup` in WAL streaming mode, + # which requires two free WAL senders) + + max_wal_senders = 10 + # Ensure WAL files contain enough information to enable read-only queries # on the standby wal_level = 'hot_standby' - # Enable up to 10 replication connections - - max_wal_senders = 10 - # How much WAL to retain on the master to allow a temporarily # disconnected standby to catch up again. The larger this is, the # longer the standby can be disconnected. This is needed only in @@ -259,16 +263,10 @@ The following replication settings must be included in `postgresql.conf`: hot_standby = on - # If archive_mode is enabled, check that 'archive_command' is non empty - # (however it's not practical to check that it actually represents a valid - # command). - # - # From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always' - # so for ease of backwards compatibility, rather than explicitly check for an - # enabled mode, check that it's not "off". + # Enable WAL file archiving archive_mode = on - # Set archive command to a script or application that will safetly store + # Set archive command to a script or application that will safely store # you WALs in a secure place. /bin/true is an example of a command that # ignores archiving. Use something more sensible. archive_command = '/bin/true' @@ -874,8 +872,8 @@ Adjust schema and node ID accordingly. A future `repmgr` release will make it possible to unregister failed standbys. -Automatic failover with repmgrd -------------------------------- +Automatic failover with `repmgrd` +--------------------------------- `repmgrd` is a management and monitoring daemon which runs on standby nodes and which can automate actions such as failover and updating standbys to @@ -995,8 +993,8 @@ during the failover: (3 rows) -repmgrd log rotation --------------------- +`repmgrd` log rotation +---------------------- Note that currently `repmgrd` does not provide logfile rotation. To ensure the current logfile does not grow indefinitely, configure your system's `logrotate` @@ -1012,8 +1010,29 @@ for up to 52 weeks and rotation forced if a file grows beyond 100Mb: create 0600 postgres postgres } -Monitoring ----------- + +`repmgrd` and PostgreSQL connection settings +-------------------------------------------- + +In addition to the `repmgr` configuration settings, parameters in the +`conninfo` string influence how `repmgr` makes a network connection to +PostgreSQL. In particular, if another server in the replication cluster +is unreachable at network level, system network settings will influence +the length of time it takes to determine that the connection is not possible. + +In particular explicitly setting a parameter for `connect_timeout` should +be considered; the effective minimum value of `2` (seconds) will ensure +that a connection failure at network level is reported as soon as possible, +otherwise dependeing on the system settings (e.g. `tcp_syn_retries` in Linux) +a delay of a minute or more is possible. + +For further details on `conninfo` network connection parameters, see: + + https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS + + +Monitoring with `repmgrd` +------------------------- When `repmgrd` is running with the option `-m/--monitoring-history`, it will constantly write standby node status information to the `repl_monitor` table, diff --git a/repmgr.conf.sample b/repmgr.conf.sample index aeab45c8..6b1af192 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -32,6 +32,12 @@ # https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING # #conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr' +# +# If repmgrd is in use, consider explicitly setting `connect_timeout` in the +# conninfo string to determine the length of time which elapses before +# a network connection attempt is abandoned; for details see: +# +# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT # Optional configuration items # ============================ From 74f6f97f263b57986e47025395d1d01c813676e1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 29 Jun 2016 10:31:57 +0900 Subject: [PATCH 097/113] repmgrd: log whether in standby or witness monitor loop This is mainly for development and debugging purposes. --- repmgrd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 47a3ca15..27d612bb 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -460,16 +460,16 @@ main(int argc, char **argv) do { - log_verbose(LOG_DEBUG, "standby check loop...\n"); - - if (node_info.type == WITNESS) - { - witness_monitor(); - } - else if (node_info.type == STANDBY) + if (node_info.type == STANDBY) { + log_verbose(LOG_DEBUG, "standby check loop...\n"); standby_monitor(); } + else if (node_info.type == WITNESS) + { + log_verbose(LOG_DEBUG, "witness check loop...\n"); + witness_monitor(); + } sleep(local_options.monitor_interval_secs); From 66b7dbbed70035bfacfd57947a75a91352712dc1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 29 Jun 2016 11:33:33 +0900 Subject: [PATCH 098/113] repmgr: use make_pg_path() consistently Per comment from gciolli. --- repmgr.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/repmgr.c b/repmgr.c index d99e7d3c..cfd00767 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2734,6 +2734,8 @@ do_standby_follow(void) * * TODO: * - make connection test timeouts/intervals configurable (see below) + * - add command line option --remote_pg_bindir or similar to + * optionally handle cases where the remote pg_bindir is different */ static void @@ -2892,8 +2894,8 @@ do_standby_switchover(void) /* 9.5 and later have pg_rewind built-in - always use that */ use_pg_rewind = true; maxlen_snprintf(remote_pg_rewind, - "%s/pg_rewind", - pg_bindir); + "%s", + make_pg_path("pg_rewind")); } else { @@ -2913,8 +2915,8 @@ do_standby_switchover(void) else { maxlen_snprintf(remote_pg_rewind, - "%s/pg_rewind", - pg_bindir); + "%s", + make_pg_path("pg_rewind")); } } else @@ -3109,8 +3111,8 @@ do_standby_switchover(void) log_verbose(LOG_DEBUG, "remote_archive_config_dir: %s\n", remote_archive_config_dir); maxlen_snprintf(command, - "%s/repmgr standby archive-config -f %s --config-archive-dir=%s", - pg_bindir, + "%s standby archive-config -f %s --config-archive-dir=%s", + make_pg_path("repmgr"), runtime_options.remote_config_file, remote_archive_config_dir); @@ -3222,8 +3224,8 @@ do_standby_switchover(void) /* Execute pg_rewind */ maxlen_snprintf(command, - "%s/pg_rewind -D %s --source-server=\\'%s\\'", - pg_bindir, + "%s -D %s --source-server=\\'%s\\'", + remote_pg_rewind, remote_data_directory, options.conninfo); @@ -3244,8 +3246,8 @@ do_standby_switchover(void) /* Restore any previously archived config files */ maxlen_snprintf(command, - "%s/repmgr standby restore-config -D %s --config-archive-dir=%s", - pg_bindir, + "%s standby restore-config -D %s --config-archive-dir=%s", + make_pg_path("repmgr"), remote_data_directory, remote_archive_config_dir); @@ -3302,8 +3304,8 @@ do_standby_switchover(void) format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, - "%s/repmgr -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone", - pg_bindir, + "%s -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone", + make_pg_path("repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params @@ -3328,8 +3330,8 @@ do_standby_switchover(void) */ format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, - "%s/repmgr -D %s -f %s %s standby follow", - pg_bindir, + "%s -D %s -f %s %s standby follow", + make_pg_path("repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params From 097024a32f280ec98bbe3d90681c62b61539d9eb Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 29 Jun 2016 12:11:53 +0900 Subject: [PATCH 099/113] repmgr: add new error code ERR_SWITCHOVER_FAIL --- errcode.h | 1 + repmgr.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/errcode.h b/errcode.h index 3a9b70e3..153d7c00 100644 --- a/errcode.h +++ b/errcode.h @@ -38,5 +38,6 @@ #define ERR_INTERNAL 15 #define ERR_MONITORING_FAIL 16 #define ERR_BAD_BACKUP_LABEL 17 +#define ERR_SWITCHOVER_FAIL 18 #endif /* _ERRCODE_H_ */ diff --git a/repmgr.c b/repmgr.c index cfd00767..62d8cb29 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2784,7 +2784,7 @@ do_standby_switchover(void) log_err(_("switchover must be executed from the standby node to be promoted\n")); PQfinish(local_conn); - exit(ERR_BAD_CONFIG); + exit(ERR_SWITCHOVER_FAIL); } server_version_num = check_server_version(local_conn, "master", true, NULL); @@ -3201,7 +3201,7 @@ do_standby_switchover(void) { log_err(_("master server did not shut down\n")); log_hint(_("check the master server status before performing any further actions")); - exit(ERR_FAILOVER_FAIL); + exit(ERR_SWITCHOVER_FAIL); } /* promote this standby */ @@ -3365,7 +3365,7 @@ do_standby_switchover(void) if (is_standby(remote_conn) == 0) { log_err(_("new standby (old master) is not a standby\n")); - exit(ERR_FAILOVER_FAIL); + exit(ERR_SWITCHOVER_FAIL); } connection_success = true; break; @@ -3379,7 +3379,7 @@ do_standby_switchover(void) if (connection_success == false) { log_err(_("unable to connect to new standby (old master)\n")); - exit(ERR_FAILOVER_FAIL); + exit(ERR_SWITCHOVER_FAIL); } log_debug("new standby is in recovery\n"); @@ -3388,15 +3388,14 @@ do_standby_switchover(void) local_conn = establish_db_connection(options.conninfo, true); - query_result = get_node_replication_state(local_conn, remote_node_record.name, remote_node_replication_state); + if (query_result == -1) { log_err(_("unable to retrieve replication status for node %i\n"), remote_node_id); PQfinish(local_conn); - // errcode? - exit(ERR_DB_QUERY); + exit(ERR_SWITCHOVER_FAIL); } if (query_result == 0) @@ -3405,7 +3404,6 @@ do_standby_switchover(void) } else { - /* XXX other valid values? */ /* XXX we should poll for a while in case the node takes time to connect to the primary */ if (strcmp(remote_node_replication_state, "streaming") == 0 || strcmp(remote_node_replication_state, "catchup") == 0) @@ -3414,9 +3412,16 @@ do_standby_switchover(void) } else { - log_err(_("node %i replication state is \"%s\"\n"), remote_node_id, remote_node_replication_state); + /* + * Other possible replication states are: + * - startup + * - backup + * - UNKNOWN + */ + log_err(_("node %i has unexpected replication state \"%s\"\n"), + remote_node_id, remote_node_replication_state); PQfinish(local_conn); - exit(ERR_DB_QUERY); + exit(ERR_SWITCHOVER_FAIL); } } From c30447ac903a938faabe058e6fb2cc1a21c8101c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 30 Jun 2016 12:51:54 +0900 Subject: [PATCH 100/113] repmgr: add option `-B/--remote-pg_bindir` for standby switchover This enables the switchover operation to function if the remote server (current primary) has a different binary directory to the current server, and addresses the issue reported in GitHub #172. --- repmgr.c | 64 ++++++++++++++++++++++++++++++++++++++++++++------------ repmgr.h | 5 +++-- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/repmgr.c b/repmgr.c index 62d8cb29..75dfac64 100644 --- a/repmgr.c +++ b/repmgr.c @@ -99,6 +99,7 @@ static bool check_upstream_config(PGconn *conn, int server_version_num, bool exi static bool update_node_record_set_master(PGconn *conn, int this_node_id); static char *make_pg_path(char *file); +static char *make_path(char *path, char *file); static void do_master_register(void); static void do_standby_register(void); @@ -180,6 +181,7 @@ main(int argc, char **argv) {"terse", required_argument, NULL, 't'}, {"mode", required_argument, NULL, 'm'}, {"remote-config-file", required_argument, NULL, 'C'}, + {"remote-pg_bindir", required_argument, NULL, 'B'}, /* deprecated from 3.2; replaced with -P/--pwprompt */ {"initdb-no-pwprompt", no_argument, NULL, 1}, {"check-upstream-config", no_argument, NULL, 2}, @@ -382,10 +384,13 @@ main(int argc, char **argv) termPQExpBuffer(&invalid_mode); } } - break; + break; case 'C': strncpy(runtime_options.remote_config_file, optarg, MAXLEN); break; + case 'B': + strncpy(runtime_options.remote_pg_bindir, optarg, MAXLEN); + break; case 1: runtime_options.initdb_no_pwprompt = true; break; @@ -2734,7 +2739,7 @@ do_standby_follow(void) * * TODO: * - make connection test timeouts/intervals configurable (see below) - * - add command line option --remote_pg_bindir or similar to + * - add command line option --remote-pg_bindir or similar to * optionally handle cases where the remote pg_bindir is different */ @@ -2751,7 +2756,8 @@ do_standby_switchover(void) char remote_host[MAXLEN]; char remote_data_directory[MAXLEN]; int remote_node_id; - char remote_node_replication_state[MAXLEN] = ""; + char remote_node_replication_state[MAXLEN] = ""; + char remote_pg_bindir[MAXLEN] = ""; char remote_archive_config_dir[MAXLEN]; char remote_pg_rewind[MAXLEN]; int i, @@ -2766,6 +2772,20 @@ do_standby_switchover(void) bool connection_success; + /* + * If --remote_pg_bindir supplied, use that to build the path on the + * remote host; if not default to whatever value is set in pg_bindir + */ + + if (strlen(runtime_options.remote_pg_bindir)) + { + strncpy(remote_pg_bindir, runtime_options.remote_pg_bindir, MAXLEN); + } + else + { + strncpy(remote_pg_bindir, pg_bindir, MAXLEN); + } + /* * SANITY CHECKS * @@ -2895,7 +2915,7 @@ do_standby_switchover(void) use_pg_rewind = true; maxlen_snprintf(remote_pg_rewind, "%s", - make_pg_path("pg_rewind")); + make_path(remote_pg_bindir, "pg_rewind")); } else { @@ -2916,7 +2936,7 @@ do_standby_switchover(void) { maxlen_snprintf(remote_pg_rewind, "%s", - make_pg_path("pg_rewind")); + make_path(remote_pg_bindir, "pg_rewind")); } } else @@ -3112,7 +3132,7 @@ do_standby_switchover(void) maxlen_snprintf(command, "%s standby archive-config -f %s --config-archive-dir=%s", - make_pg_path("repmgr"), + make_path(remote_pg_bindir, "repmgr"), runtime_options.remote_config_file, remote_archive_config_dir); @@ -3151,7 +3171,7 @@ do_standby_switchover(void) maxlen_snprintf(command, "%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0", - make_pg_path("pg_ctl"), + make_path(remote_pg_bindir, "pg_ctl"), remote_data_directory, runtime_options.pg_ctl_mode); @@ -3247,7 +3267,7 @@ do_standby_switchover(void) /* Restore any previously archived config files */ maxlen_snprintf(command, "%s standby restore-config -D %s --config-archive-dir=%s", - make_pg_path("repmgr"), + make_path(remote_pg_bindir, "repmgr"), remote_data_directory, remote_archive_config_dir); @@ -3305,7 +3325,7 @@ do_standby_switchover(void) format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, "%s -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone", - make_pg_path("repmgr"), + make_path(remote_pg_bindir, "repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params @@ -3331,7 +3351,7 @@ do_standby_switchover(void) format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, "%s -D %s -f %s %s standby follow", - make_pg_path("repmgr"), + make_path(remote_pg_bindir, "repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params @@ -3355,13 +3375,15 @@ do_standby_switchover(void) for(i = 0; i < options.reconnect_attempts; i++) { - /* Check whether primary is available */ + /* Check whether new standby available */ remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */ if (PQstatus(remote_conn) == CONNECTION_OK) { log_debug("connected to new standby (old master)\n"); + + /* make sure it's actually a standby */ if (is_standby(remote_conn) == 0) { log_err(_("new standby (old master) is not a standby\n")); @@ -4201,6 +4223,8 @@ do_help(void) printf(_(" -m, --mode (standby switchover) shutdown mode (smart|fast|immediate)\n")); printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \ " the current master\n")); + printf(_(" -B, --remote-pg_bindir (standby switchover) path to PostgreSQL binaries on\n" \ + " the current master\n")); printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \ " optionally providing a path to the binary\n")); printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n")); @@ -4743,6 +4767,14 @@ check_parameters_for_action(const int action) } } + if (action != STANDBY_SWITCHOVER) + { + if (strlen(runtime_options.remote_pg_bindir)) + { + error_list_append(&cli_warnings, _("--remote-pg_bindir an only be used when executing STANDBY_SWITCHOVER")); + } + } + return; } @@ -5496,12 +5528,18 @@ do_check_upstream_config(void) static char * make_pg_path(char *file) { - maxlen_snprintf(path_buf, "%s%s", pg_bindir, file); + return make_path(pg_bindir, file); +} + + +static char * +make_path(char *path, char *file) +{ + maxlen_snprintf(path_buf, "%s%s", path, file); return path_buf; } - static void exit_with_errors(void) { diff --git a/repmgr.h b/repmgr.h index 358dec92..4ce9c6bd 100644 --- a/repmgr.h +++ b/repmgr.h @@ -79,8 +79,9 @@ typedef struct */ char loglevel[MAXLEN]; - /* parameter used by STANDBY SWITCHOVER */ + /* parameters used by STANDBY SWITCHOVER */ char remote_config_file[MAXLEN]; + char remote_pg_bindir[MAXLEN]; char pg_rewind[MAXPGPATH]; /* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */ char config_archive_dir[MAXLEN]; @@ -96,7 +97,7 @@ typedef struct bool initdb_no_pwprompt; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false } +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", "", 0, "", "", "", false } struct BackupLabel { From 746c9793ed725338f597a04079bc0c5b0cca482c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 30 Jun 2016 21:27:52 +0900 Subject: [PATCH 101/113] Better detect completion of demotion candidate shutdown If a connection attempt fails, keep pinging the server until it finally away, or the timeout kicks in. Addresses issue reported in GitHub #188 and previously noted in repmgr.c --- repmgr.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/repmgr.c b/repmgr.c index 75dfac64..e58bcf12 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2769,8 +2769,8 @@ do_standby_switchover(void) char repmgr_db_cli_params[MAXLEN] = ""; int query_result; t_node_info remote_node_record; - bool connection_success; - + bool connection_success, + shutdown_success; /* * If --remote_pg_bindir supplied, use that to build the path on the @@ -3187,37 +3187,42 @@ do_standby_switchover(void) termPQExpBuffer(&command_output); - connection_success = false; + shutdown_success = false; /* loop for timeout waiting for current primary to stop */ - for(i = 0; i < options.reconnect_attempts; i++) + for (i = 0; i < options.reconnect_attempts; i++) { /* Check whether primary is available */ remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */ - /* XXX failure to connect doesn't mean the server is necessarily - * completely stopped - we need to better detect the reason for - * connection failure ("server not listening" vs "shutting down") - * - * -> check is_pgup() + /* + * If we're unable to connect, keep PQping-ing the server until it + * finally goes away */ if (PQstatus(remote_conn) != CONNECTION_OK) { - connection_success = true; + PGPing ping_res = PQping(remote_conninfo); - log_notice(_("current master has been stopped\n")); - break; + /* database server could not be contacted */ + if (ping_res == PQPING_NO_RESPONSE) + { + /* XXX we should double-check access to the physical server here */ + shutdown_success = true; + + log_notice(_("current master has been stopped\n")); + break; + } } PQfinish(remote_conn); - // configurable? + /* XXX make configurable? */ sleep(options.reconnect_interval); i++; } - if (connection_success == false) + if (shutdown_success == false) { log_err(_("master server did not shut down\n")); log_hint(_("check the master server status before performing any further actions")); From 60bceae905bbe5bd64ffce84732b90021fb58600 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 07:50:27 +0900 Subject: [PATCH 102/113] Use PQping only to check for server shutdown Functionally there's no difference between that and attempting to make an actual connection, so use one method only, which also simplifies the code. --- repmgr.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/repmgr.c b/repmgr.c index e58bcf12..51c1cac6 100644 --- a/repmgr.c +++ b/repmgr.c @@ -3195,27 +3195,22 @@ do_standby_switchover(void) { /* Check whether primary is available */ - remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */ + PGPing ping_res = PQping(remote_conninfo); - /* - * If we're unable to connect, keep PQping-ing the server until it - * finally goes away - */ - if (PQstatus(remote_conn) != CONNECTION_OK) + /* database server could not be contacted */ + if (ping_res == PQPING_NO_RESPONSE) { - PGPing ping_res = PQping(remote_conninfo); + /* + * XXX here we should directly access the server and check that the + * pidfile has gone away so we can be sure the server is actually + * shut down and the PQPING_NO_RESPONSE is not due to other issues + * such as coincidental network failure. + */ + shutdown_success = true; - /* database server could not be contacted */ - if (ping_res == PQPING_NO_RESPONSE) - { - /* XXX we should double-check access to the physical server here */ - shutdown_success = true; - - log_notice(_("current master has been stopped\n")); - break; - } + log_notice(_("current master has been stopped\n")); + break; } - PQfinish(remote_conn); /* XXX make configurable? */ sleep(options.reconnect_interval); From 69d9d137e0cf4d1db11f44aa1ced9546ccdee468 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 08:47:20 +0900 Subject: [PATCH 103/113] repmgr: change default pg_ctl shutdown mode to "fast" This matches the default pg_ctl behaviour (from 9.5) and is the more sensible option for performing time-critical operations such as switchover. --- repmgr.c | 2 +- repmgr.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/repmgr.c b/repmgr.c index 51c1cac6..6c9761ac 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4220,7 +4220,7 @@ do_help(void) printf(_(" -w, --wal-keep-segments=VALUE (standby clone) minimum value for the GUC\n" \ " wal_keep_segments (default: %s)\n"), DEFAULT_WAL_KEEP_SEGMENTS); printf(_(" -W, --wait (standby follow) wait for a master to appear\n")); - printf(_(" -m, --mode (standby switchover) shutdown mode (smart|fast|immediate)\n")); + printf(_(" -m, --mode (standby switchover) shutdown mode (\"fast\" - default, \"smart\" or \"immediate\")\n")); printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \ " the current master\n")); printf(_(" -B, --remote-pg_bindir (standby switchover) path to PostgreSQL binaries on\n" \ diff --git a/repmgr.h b/repmgr.h index 4ce9c6bd..bb37fbb9 100644 --- a/repmgr.h +++ b/repmgr.h @@ -71,7 +71,6 @@ typedef struct bool fast_checkpoint; bool ignore_external_config_files; bool csv_mode; - char pg_ctl_mode[MAXLEN]; char masterport[MAXLEN]; /* * configuration file parameters which can be overridden on the @@ -83,6 +82,7 @@ typedef struct char remote_config_file[MAXLEN]; char remote_pg_bindir[MAXLEN]; char pg_rewind[MAXPGPATH]; + char pg_ctl_mode[MAXLEN]; /* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */ char config_archive_dir[MAXLEN]; /* parameter used by CLUSTER CLEANUP */ @@ -97,7 +97,7 @@ typedef struct bool initdb_no_pwprompt; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", "", 0, "", "", "", false } +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "", "fast", "", 0, "", "", "", false } struct BackupLabel { From a0f02e454c0248f9f15d8740f8b7c68e3f6a41a5 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 09:42:55 +0900 Subject: [PATCH 104/113] repmgr: during switchover check demotion candidate's pidfile If the pidfile is still there after apparent shutdown, or we're unable to access the server at all, something has gone wrong and the switchover should be aborted. --- repmgr.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/repmgr.c b/repmgr.c index 6c9761ac..73e0ddfb 100644 --- a/repmgr.c +++ b/repmgr.c @@ -2751,7 +2751,7 @@ do_standby_switchover(void) int server_version_num; bool use_pg_rewind; - /* the remote server is the primary which will be demoted */ + /* the remote server is the primary to be demoted */ char remote_conninfo[MAXCONNINFO] = ""; char remote_host[MAXLEN]; char remote_data_directory[MAXLEN]; @@ -3200,16 +3200,38 @@ do_standby_switchover(void) /* database server could not be contacted */ if (ping_res == PQPING_NO_RESPONSE) { + bool command_success; + /* - * XXX here we should directly access the server and check that the + * directly access the server and check that the * pidfile has gone away so we can be sure the server is actually * shut down and the PQPING_NO_RESPONSE is not due to other issues * such as coincidental network failure. */ - shutdown_success = true; + initPQExpBuffer(&command_output); - log_notice(_("current master has been stopped\n")); - break; + maxlen_snprintf(command, + "ls %s/postmaster.pid >/dev/null 2>&1 && echo 1 || echo 0", + remote_data_directory); + + command_success = remote_command( + remote_host, + runtime_options.remote_user, + command, + &command_output); + + if (command_success == true && *command_output.data == '0') + { + shutdown_success = true; + + log_notice(_("current master has been stopped\n")); + + termPQExpBuffer(&command_output); + + break; + } + + termPQExpBuffer(&command_output); } /* XXX make configurable? */ From a6294b7da022aff9c63767ac1b4298797f726e84 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 10:47:20 +0900 Subject: [PATCH 105/113] Update README.md Add note about logging configuration and settings in `pg_ctl_options` for switchover operations. --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6a319c42..ff387e16 100644 --- a/README.md +++ b/README.md @@ -739,7 +739,7 @@ both passwordless SSH access and the path of `repmgr.conf` on that server. > careful preparation and with adequate attention. In particular you should > be confident that your network environment is stable and reliable. > -> We recommend running `repmgr standby switchover` at the most verbose +> We recommend running `repmgr standby switchover` at the most verbose > logging level (`--log-level DEBUG --verbose`) and capturing all output > to assist troubleshooting any problems. > @@ -805,7 +805,7 @@ should have been updated to reflect this: ### Caveats -- the functionality provided `repmgr standby switchover` is primarily aimed +- The functionality provided `repmgr standby switchover` is primarily aimed at a two-server master/standby replication cluster and currently does not support additional standbys. - `repmgr standby switchover` is designed to use the `pg_rewind` utility, @@ -819,6 +819,11 @@ should have been updated to reflect this: the `repmgrd` may try and promote a standby by itself. - Any other standbys attached to the old master will need to be manually instructed to point to the new master (e.g. with `repmgr standby follow`). +- You must ensure that following a server start using `pg_ctl`, log output + is not send to STDERR (the default behaviour). If logging is not configured, + We recommend setting `logging_collector=on` in `postgresql.conf` and + providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options` + parameter. We hope to remove some of these restrictions in future versions of `repmgr`. From c5a721a3cf23fef3ea9de99aa5d65016350da4a8 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 12:07:10 +0900 Subject: [PATCH 106/113] TODO: remove resolved item --- TODO | 2 -- 1 file changed, 2 deletions(-) diff --git a/TODO b/TODO index 4ec153c4..0aa626a5 100644 --- a/TODO +++ b/TODO @@ -53,8 +53,6 @@ Planned feature improvements requested, activate the replication slot using pg_receivexlog to negate the need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5). -* Take into account the fact that a standby can obtain WAL from an archive, - so even if direct streaming replication is interrupted, it may be up-to-date Usability improvements ====================== From 013b4b4b8ae2845406fd7638a390dc4442da0dd9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 1 Jul 2016 12:09:35 +0900 Subject: [PATCH 107/113] Update README/TODO about following non-master server --- README.md | 5 +++-- TODO | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff387e16..b0910679 100644 --- a/README.md +++ b/README.md @@ -711,8 +711,9 @@ updated to reflect this: Note that with cascading replication, `repmgr standby follow` can also be -used to detach a standby from its current upstream server and follow another -upstream server, including the master. +used to detach a standby from its current upstream server and follow the +master. However it's currently not possible to have it follow another standby; +we hope to improve this in a future release. Performing a switchover with repmgr diff --git a/TODO b/TODO index 0aa626a5..3b78a9b6 100644 --- a/TODO +++ b/TODO @@ -53,6 +53,9 @@ Planned feature improvements requested, activate the replication slot using pg_receivexlog to negate the need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5). +* repmgr: enable "standby follow" to point a standby at another standby, not + just the replication cluster master (see GitHub #130) + Usability improvements ====================== From 8cd79fd7dd3597779eb02c3912cb76a0feab3e02 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Jul 2016 11:30:36 +0900 Subject: [PATCH 108/113] Revert "repmgr: add option `-B/--remote-pg_bindir` for standby switchover" This reverts commit c30447ac903a938faabe058e6fb2cc1a21c8101c. --- repmgr.c | 64 ++++++++++++-------------------------------------------- repmgr.h | 5 ++--- 2 files changed, 15 insertions(+), 54 deletions(-) diff --git a/repmgr.c b/repmgr.c index 73e0ddfb..88977aac 100644 --- a/repmgr.c +++ b/repmgr.c @@ -99,7 +99,6 @@ static bool check_upstream_config(PGconn *conn, int server_version_num, bool exi static bool update_node_record_set_master(PGconn *conn, int this_node_id); static char *make_pg_path(char *file); -static char *make_path(char *path, char *file); static void do_master_register(void); static void do_standby_register(void); @@ -181,7 +180,6 @@ main(int argc, char **argv) {"terse", required_argument, NULL, 't'}, {"mode", required_argument, NULL, 'm'}, {"remote-config-file", required_argument, NULL, 'C'}, - {"remote-pg_bindir", required_argument, NULL, 'B'}, /* deprecated from 3.2; replaced with -P/--pwprompt */ {"initdb-no-pwprompt", no_argument, NULL, 1}, {"check-upstream-config", no_argument, NULL, 2}, @@ -384,13 +382,10 @@ main(int argc, char **argv) termPQExpBuffer(&invalid_mode); } } - break; + break; case 'C': strncpy(runtime_options.remote_config_file, optarg, MAXLEN); break; - case 'B': - strncpy(runtime_options.remote_pg_bindir, optarg, MAXLEN); - break; case 1: runtime_options.initdb_no_pwprompt = true; break; @@ -2739,7 +2734,7 @@ do_standby_follow(void) * * TODO: * - make connection test timeouts/intervals configurable (see below) - * - add command line option --remote-pg_bindir or similar to + * - add command line option --remote_pg_bindir or similar to * optionally handle cases where the remote pg_bindir is different */ @@ -2756,8 +2751,7 @@ do_standby_switchover(void) char remote_host[MAXLEN]; char remote_data_directory[MAXLEN]; int remote_node_id; - char remote_node_replication_state[MAXLEN] = ""; - char remote_pg_bindir[MAXLEN] = ""; + char remote_node_replication_state[MAXLEN] = ""; char remote_archive_config_dir[MAXLEN]; char remote_pg_rewind[MAXLEN]; int i, @@ -2772,20 +2766,6 @@ do_standby_switchover(void) bool connection_success, shutdown_success; - /* - * If --remote_pg_bindir supplied, use that to build the path on the - * remote host; if not default to whatever value is set in pg_bindir - */ - - if (strlen(runtime_options.remote_pg_bindir)) - { - strncpy(remote_pg_bindir, runtime_options.remote_pg_bindir, MAXLEN); - } - else - { - strncpy(remote_pg_bindir, pg_bindir, MAXLEN); - } - /* * SANITY CHECKS * @@ -2915,7 +2895,7 @@ do_standby_switchover(void) use_pg_rewind = true; maxlen_snprintf(remote_pg_rewind, "%s", - make_path(remote_pg_bindir, "pg_rewind")); + make_pg_path("pg_rewind")); } else { @@ -2936,7 +2916,7 @@ do_standby_switchover(void) { maxlen_snprintf(remote_pg_rewind, "%s", - make_path(remote_pg_bindir, "pg_rewind")); + make_pg_path("pg_rewind")); } } else @@ -3132,7 +3112,7 @@ do_standby_switchover(void) maxlen_snprintf(command, "%s standby archive-config -f %s --config-archive-dir=%s", - make_path(remote_pg_bindir, "repmgr"), + make_pg_path("repmgr"), runtime_options.remote_config_file, remote_archive_config_dir); @@ -3171,7 +3151,7 @@ do_standby_switchover(void) maxlen_snprintf(command, "%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0", - make_path(remote_pg_bindir, "pg_ctl"), + make_pg_path("pg_ctl"), remote_data_directory, runtime_options.pg_ctl_mode); @@ -3289,7 +3269,7 @@ do_standby_switchover(void) /* Restore any previously archived config files */ maxlen_snprintf(command, "%s standby restore-config -D %s --config-archive-dir=%s", - make_path(remote_pg_bindir, "repmgr"), + make_pg_path("repmgr"), remote_data_directory, remote_archive_config_dir); @@ -3347,7 +3327,7 @@ do_standby_switchover(void) format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, "%s -D %s -f %s %s --rsync-only --force --ignore-external-config-files standby clone", - make_path(remote_pg_bindir, "repmgr"), + make_pg_path("repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params @@ -3373,7 +3353,7 @@ do_standby_switchover(void) format_db_cli_params(options.conninfo, repmgr_db_cli_params); maxlen_snprintf(command, "%s -D %s -f %s %s standby follow", - make_path(remote_pg_bindir, "repmgr"), + make_pg_path("repmgr"), remote_data_directory, runtime_options.remote_config_file, repmgr_db_cli_params @@ -3397,15 +3377,13 @@ do_standby_switchover(void) for(i = 0; i < options.reconnect_attempts; i++) { - /* Check whether new standby available */ + /* Check whether primary is available */ remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */ if (PQstatus(remote_conn) == CONNECTION_OK) { log_debug("connected to new standby (old master)\n"); - - /* make sure it's actually a standby */ if (is_standby(remote_conn) == 0) { log_err(_("new standby (old master) is not a standby\n")); @@ -4245,8 +4223,6 @@ do_help(void) printf(_(" -m, --mode (standby switchover) shutdown mode (\"fast\" - default, \"smart\" or \"immediate\")\n")); printf(_(" -C, --remote-config-file (standby switchover) path to the configuration file on\n" \ " the current master\n")); - printf(_(" -B, --remote-pg_bindir (standby switchover) path to PostgreSQL binaries on\n" \ - " the current master\n")); printf(_(" --pg_rewind[=VALUE] (standby switchover) 9.3/9.4 only - use pg_rewind if available,\n" \ " optionally providing a path to the binary\n")); printf(_(" -k, --keep-history=VALUE (cluster cleanup) retain indicated number of days of history (default: 0)\n")); @@ -4789,14 +4765,6 @@ check_parameters_for_action(const int action) } } - if (action != STANDBY_SWITCHOVER) - { - if (strlen(runtime_options.remote_pg_bindir)) - { - error_list_append(&cli_warnings, _("--remote-pg_bindir an only be used when executing STANDBY_SWITCHOVER")); - } - } - return; } @@ -5550,18 +5518,12 @@ do_check_upstream_config(void) static char * make_pg_path(char *file) { - return make_path(pg_bindir, file); -} - - -static char * -make_path(char *path, char *file) -{ - maxlen_snprintf(path_buf, "%s%s", path, file); + maxlen_snprintf(path_buf, "%s%s", pg_bindir, file); return path_buf; } + static void exit_with_errors(void) { diff --git a/repmgr.h b/repmgr.h index bb37fbb9..20690eea 100644 --- a/repmgr.h +++ b/repmgr.h @@ -78,9 +78,8 @@ typedef struct */ char loglevel[MAXLEN]; - /* parameters used by STANDBY SWITCHOVER */ + /* parameter used by STANDBY SWITCHOVER */ char remote_config_file[MAXLEN]; - char remote_pg_bindir[MAXLEN]; char pg_rewind[MAXPGPATH]; char pg_ctl_mode[MAXLEN]; /* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */ @@ -97,7 +96,7 @@ typedef struct bool initdb_no_pwprompt; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "", "fast", "", 0, "", "", "", false } +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", "", false } struct BackupLabel { From e8a0cd33b5b4d2795098fbdcb3ed0c84ab71f4fd Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 5 Jul 2016 11:33:06 +0900 Subject: [PATCH 109/113] Ensure all node record structures are initialised --- dbutils.h | 25 +++++++++++++------------ repmgr.c | 7 +++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dbutils.h b/dbutils.h index 85a2b167..d96667fa 100644 --- a/dbutils.h +++ b/dbutils.h @@ -52,18 +52,6 @@ typedef struct s_node_info } t_node_info; -/* - * Struct to store replication slot information - */ - -typedef struct s_replication_slot -{ - char slot_name[MAXLEN]; - char slot_type[MAXLEN]; - bool active; -} t_replication_slot; - - #define T_NODE_INFO_INITIALIZER { \ NODE_NOT_FOUND, \ NO_UPSTREAM_NODE, \ @@ -78,6 +66,19 @@ typedef struct s_replication_slot InvalidXLogRecPtr \ } +/* + * Struct to store replication slot information + */ + +typedef struct s_replication_slot +{ + char slot_name[MAXLEN]; + char slot_type[MAXLEN]; + bool active; +} t_replication_slot; + + + PGconn *_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice); diff --git a/repmgr.c b/repmgr.c index 88977aac..7f602a05 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1,4 +1,3 @@ - /* * repmgr.c - Command interpreter for the repmgr package * Copyright (C) 2ndQuadrant, 2010-2016 @@ -1128,7 +1127,7 @@ do_standby_register(void) int ret; bool record_created; - t_node_info node_record; + t_node_info node_record = T_NODE_INFO_INITIALIZER; int node_result; log_info(_("connecting to standby database\n")); @@ -2762,7 +2761,7 @@ do_standby_switchover(void) char repmgr_db_cli_params[MAXLEN] = ""; int query_result; - t_node_info remote_node_record; + t_node_info remote_node_record = T_NODE_INFO_INITIALIZER; bool connection_success, shutdown_success; @@ -3455,7 +3454,7 @@ do_standby_switchover(void) if (options.use_replication_slots) { - t_node_info local_node_record; + t_node_info local_node_record = T_NODE_INFO_INITIALIZER; query_result = get_node_record(local_conn, options.cluster_name, options.node, &local_node_record); From 5e9db47d12ab68dce65baa319530a9ab6e845db2 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 5 Jul 2016 21:06:31 +0900 Subject: [PATCH 110/113] Fix query in get_node_record_by_name() --- dbutils.c | 4 +++- repmgr.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbutils.c b/dbutils.c index 29a2f6be..c962ba0d 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1717,11 +1717,13 @@ get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_no "SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active" " FROM %s.repl_nodes " " WHERE cluster = '%s' " - " AND node_name = %s", + " AND name = '%s'", get_repmgr_schema_quoted(conn), cluster, node_name); + log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery); + result = _get_node_record(conn, cluster, sqlquery, node_info); if (result == 0) diff --git a/repmgr.c b/repmgr.c index 7f602a05..0edd4c65 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1198,7 +1198,7 @@ do_standby_register(void) { if (node_record.active == true) { - log_err(_("Node %i exists already with node_name \"%s\""), + log_err(_("Node %i exists already with node_name \"%s\"\n"), node_record.node_id, options.node_name); PQfinish(master_conn); From 091541619d8859115f99e3bb302419592191d6a1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 6 Jul 2016 09:27:31 +0900 Subject: [PATCH 111/113] Fix repmgrd monitoring calculation when in archive recovery --- repmgrd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/repmgrd.c b/repmgrd.c index 27d612bb..4982ae71 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -961,7 +961,7 @@ standby_monitor(void) if (active_master_id != master_options.node) { - log_notice(_("connecting to active master (node %i)...\n"), active_master_id); \ + log_notice(_("connecting to active master (node %i)...\n"), active_master_id); if (master_conn != NULL) { PQfinish(master_conn); @@ -1057,11 +1057,8 @@ standby_monitor(void) strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); - lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL); lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); - lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); - /* Calculate apply lag */ if (last_xlog_receive_location_gte_replayed == false) @@ -1072,10 +1069,12 @@ standby_monitor(void) */ apply_lag = 0; strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN); + lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL); } else { apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location; + lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL); } /* Calculate replication lag */ From 5e03ef40cbe49b63d0889daf17bb541f7eb663a9 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 6 Jul 2016 15:48:14 +0900 Subject: [PATCH 112/113] Update HISTORY --- HISTORY | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/HISTORY b/HISTORY index 9e594e10..69cff214 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,13 @@ +3.1.4 2016-07- + repmgr: new configuration option for setting "restore_command" + in the recovery.conf file generated by repmgr (Martín) + repmgr: add --csv option to "repmgr cluster show" (Gianni) + repmgr: enable provision of a conninfo string as the -d/--dbname + parameter, similar to other PostgreSQL utilities (Ian) + repmgr: during switchover operations improve detection of + demotion candidate shutdown (Ian) + various bugfixes and documentation updates (Ian, Martín) + 3.1.3 2016-05-17 repmgrd: enable monitoring when a standby is catching up by replaying archived WAL (Ian) From 72f9b0145afab1060dd1202c8f8937653c8b2e39 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 6 Jul 2016 15:55:56 +0900 Subject: [PATCH 113/113] Use rmtree instead of executing "rm -rf" This completes the task mentioned in b6b6439819a5cf26026f32ef400a747637f0138d --- repmgr.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/repmgr.c b/repmgr.c index 0edd4c65..053b1da4 100644 --- a/repmgr.c +++ b/repmgr.c @@ -768,9 +768,9 @@ do_cluster_show(void) conn = establish_db_connection(options.conninfo, true); sqlquery_snprintf(sqlquery, - "SELECT conninfo, type, name, upstream_node_name, id" - " FROM %s.repl_show_nodes", - get_repmgr_schema_quoted(conn)); + "SELECT conninfo, type, name, upstream_node_name, id" + " FROM %s.repl_show_nodes", + get_repmgr_schema_quoted(conn)); log_verbose(LOG_DEBUG, "do_cluster_show(): \n%s\n",sqlquery ); @@ -1371,8 +1371,6 @@ do_standby_clone(void) char *first_wal_segment = NULL; char *last_wal_segment = NULL; - char xlog_dir[MAXLEN] = ""; - PQExpBufferData event_details; @@ -2043,8 +2041,8 @@ stop_backup: */ if (runtime_options.rsync_only) { - char script[MAXLEN]; char label_path[MAXPGPATH]; + char dirpath[MAXLEN] = ""; if (runtime_options.force) { @@ -2053,15 +2051,14 @@ stop_backup: * rsync's --exclude option doesn't do it. */ - maxlen_snprintf(xlog_dir, "%s/pg_xlog/", local_data_directory); + maxlen_snprintf(dirpath, "%s/pg_xlog/", local_data_directory); - if (!rmtree(xlog_dir, false)) + if (!rmtree(dirpath, false)) { log_err(_("unable to empty local WAL directory %s\n"), - xlog_dir); + dirpath); exit(ERR_BAD_RSYNC); } - } /* @@ -2082,15 +2079,15 @@ stop_backup: if (server_version_num >= 90400 && backup_label.min_failover_slot_lsn == InvalidXLogRecPtr) { - maxlen_snprintf(script, "rm -rf %s/pg_replslot/*", + maxlen_snprintf(dirpath, "%s/pg_replslot/*", local_data_directory); log_debug("deleting pg_replslot directory contents\n"); - r = system(script); - if (r != 0) + + if (!rmtree(dirpath, false)) { - log_err(_("unable to empty replication slot directory %s/pg_replslot/\n"), - local_data_directory); + log_err(_("unable to empty replication slot directory %s\n"), + dirpath); exit(ERR_BAD_RSYNC); } }