From ed22fe326e8760a2c92fc8f17a61ac55a4ab2a74 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 30 Sep 2016 11:57:02 +0900 Subject: [PATCH] Document and expand pg_ctl override configuration options These are now prefixed with "service_" to emphasize that they're OS-level commands, not repmgr ones; also added reload and promote commands: service_start_command service_stop_command service_restart_command service_reload_command service_promote_command GitHub #169 --- HISTORY | 4 ++- README.md | 57 ++++++++++++++++++++++++++++------- config.c | 24 +++++++++------ config.h | 12 +++++--- repmgr.c | 74 +++++++++++++++++++++++++++------------------- repmgr.conf.sample | 12 ++++---- 6 files changed, 124 insertions(+), 59 deletions(-) diff --git a/HISTORY b/HISTORY index a53b287c..1aa4a530 100644 --- a/HISTORY +++ b/HISTORY @@ -1,6 +1,6 @@ 3.2 2016- repmgr: add support for cloning from a Barman backup (Gianni) - repmhr: add commands `standby matrix` and `standby diagnose` (Gianni) + repmgr: add commands `standby matrix` and `standby crosscheck` (Gianni) repmgr: suppress connection error display in `repmgr cluster show` unless `--verbose` supplied (Ian) repmgr: add commands `witness register` and `witness unregister` (Ian) @@ -15,6 +15,8 @@ the standby (Ian) repmgr: add option `--copy-external-config-files` for files outside of the data directory (Ian) + repmgr: add configuration options to override the default pg_ctl + commands (Jarkko Oranen) packaging: improve "repmgr-auto" Debian package (Gianni) 3.1.5 2016-08-15 diff --git a/README.md b/README.md index 292fbe2c..402c9086 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ is not required, but is necessary in the following cases: data directory * when using `rsync` to clone a standby * to perform switchover operations -* when executing `repmgr cluster matrix` and `repmgr cluster diagnose` +* when executing `repmgr cluster matrix` and `repmgr cluster crosscheck` In these cases `rsync` is required on all servers too. @@ -475,8 +475,8 @@ settings which have not been activated, the file will not be copied. * * * -> *TIP*: for reliable configuration file management we recommend using a configuration -> management tool such as Ansible, Chef, Puppet or Salt. +> *TIP*: for reliable configuration file management we recommend using a +> configuration management tool such as Ansible, Chef, Puppet or Salt. * * * @@ -1423,6 +1423,42 @@ In general `repmgr` can be upgraded as-is without any further action required, however feature releases may require the `repmgr` database to be upgraded. An SQL script will be provided - please check the release notes for details. + +Distribution-specific configuration +----------------------------------- + +`repmgr` is largely OS-agnostic and can be run on any UNIX-like environment +including various Linux distributions, Solaris, macOS and the various BSDs. + +However, often OS-specific configuration is required, particularly when +dealing with system service management (e.g. stopping and starting the +PostgreSQL server), file paths and configuration file locations. + +### PostgreSQL server control + +By default, `repmgr` will use PostgreSQL's standard `pg_ctl` utility to control +a running PostgreSQL server. However it may be better to use the operating +system's service management system, e.g. `systemd`. To specify which service +control commands are used, the following `repmgr.conf` configuration settings +are available: + + service_start_command + service_stop_command + service_restart_command + service_reload_command + service_promote_command + +See `repmgr.conf.sample` for further details. + +### Binary directory + +Some PostgreSQL system packages, such as those provided for Debian/Ubuntu, like +to hide some PostgreSQL utility programs outside of the default path. To ensure +`repmgr` finds all required executables, explicitly set `pg_bindir` to the +appropriate location, e.g. for PostgreSQL 9.6 on Debian/Ubuntu this would be +`/usr/lib/postgresql/9.6/bin/`. + + Reference --------- @@ -1602,7 +1638,7 @@ which contains connection details for the local database. The first column is the node's ID, and the second column represents the node's status (0 = master, 1 = standby, -1 = failed). -* `cluster matrix` and `cluster diagnose` +* `cluster matrix` and `cluster crosscheck` These commands display connection information for each pair of nodes in the replication cluster. @@ -1610,8 +1646,9 @@ which contains connection details for the local database. - `cluster matrix` runs a `cluster show` on each node and arranges the results in a matrix, recording success or failure; - - `cluster diagnose` runs a `cluster matrix` on each node and - combines the results in a single matrix. + - `cluster crosscheck` runs a `cluster matrix` on each node and + combines the results in a single matrix, providing a full + overview of connections between all databases in the cluster. These commands require a valid `repmgr.conf` file on each node. Additionally password-less `ssh` connections are required between @@ -1653,7 +1690,7 @@ which contains connection details for the local database. node1 and node2, meaning that inbound connections to these nodes have succeeded. - In this case, `cluster diagnose` gives the same result as `cluster + In this case, `cluster crosscheck` gives the same result as `cluster matrix`, because from any functioning node we can observe the same state: `node1` and `node2` are up, `node3` is down. @@ -1680,9 +1717,9 @@ which contains connection details for the local database. and that (therefore) we don't know the state of any outbound connection from node3. - In this case, the `cluster diagnose` command is more informative: + In this case, the `cluster crosscheck` command is more informative: - $ repmgr -f /etc/repmgr.conf cluster diagnose + $ repmgr -f /etc/repmgr.conf cluster crosscheck Name | Id | 1 | 2 | 3 -------+----+----+----+---- @@ -1690,7 +1727,7 @@ which contains connection details for the local database. node2 | 2 | * | * | * node3 | 3 | * | * | * - What happened is that `cluster diagnose` merged its own `cluster + What happened is that `cluster crosscheck` merged its own `cluster matrix` with the `cluster matrix` output from `node2`; the latter is able to connect to `node3` and therefore determine the state of outbound connections from that node. diff --git a/config.c b/config.c index 5df5872d..96d3d88b 100644 --- a/config.c +++ b/config.c @@ -222,9 +222,11 @@ parse_config(t_configuration_options *options) memset(options->node_name, 0, sizeof(options->node_name)); memset(options->promote_command, 0, sizeof(options->promote_command)); memset(options->follow_command, 0, sizeof(options->follow_command)); - memset(options->stop_command, 0, sizeof(options->stop_command)); - memset(options->start_command, 0, sizeof(options->start_command)); - memset(options->restart_command, 0, sizeof(options->restart_command)); + memset(options->service_stop_command, 0, sizeof(options->service_stop_command)); + memset(options->service_start_command, 0, sizeof(options->service_start_command)); + memset(options->service_restart_command, 0, sizeof(options->service_restart_command)); + memset(options->service_reload_command, 0, sizeof(options->service_reload_command)); + memset(options->service_promote_command, 0, sizeof(options->service_promote_command)); memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->ssh_options, 0, sizeof(options->ssh_options)); memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); @@ -351,12 +353,16 @@ parse_config(t_configuration_options *options) strncpy(options->promote_command, value, MAXLEN); else if (strcmp(name, "follow_command") == 0) strncpy(options->follow_command, value, MAXLEN); - else if (strcmp(name, "stop_command") == 0) - strncpy(options->stop_command, value, MAXLEN); - else if (strcmp(name, "start_command") == 0) - strncpy(options->start_command, value, MAXLEN); - else if (strcmp(name, "restart_command") == 0) - strncpy(options->restart_command, value, MAXLEN); + else if (strcmp(name, "service_stop_command") == 0) + strncpy(options->service_stop_command, value, MAXLEN); + else if (strcmp(name, "service_start_command") == 0) + strncpy(options->service_start_command, value, MAXLEN); + else if (strcmp(name, "service_restart_command") == 0) + strncpy(options->service_restart_command, value, MAXLEN); + else if (strcmp(name, "service_reload_command") == 0) + strncpy(options->service_reload_command, value, MAXLEN); + else if (strcmp(name, "service_promote_command") == 0) + strncpy(options->service_promote_command, value, MAXLEN); else if (strcmp(name, "master_response_timeout") == 0) options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false); /* diff --git a/config.h b/config.h index 808c8598..e7940119 100644 --- a/config.h +++ b/config.h @@ -63,11 +63,15 @@ typedef struct int failover; int priority; char node_name[MAXLEN]; + /* commands executed by repmgrd */ char promote_command[MAXLEN]; char follow_command[MAXLEN]; - char stop_command[MAXLEN]; - char start_command[MAXLEN]; - char restart_command[MAXLEN]; + /* Overrides for pg_ctl commands */ + char service_stop_command[MAXLEN]; + char service_start_command[MAXLEN]; + char service_restart_command[MAXLEN]; + char service_reload_command[MAXLEN]; + char service_promote_command[MAXLEN]; char loglevel[MAXLEN]; char logfacility[MAXLEN]; char rsync_options[QUERY_STR_LEN]; @@ -93,7 +97,7 @@ typedef struct * The following will initialize the structure with a minimal set of options; * actual defaults are set in parse_config() before parsing the configuration file */ -#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", "", "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } } +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", "", "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } } typedef struct ItemListCell { diff --git a/repmgr.c b/repmgr.c index 4bfd9987..bf9d4a3f 100644 --- a/repmgr.c +++ b/repmgr.c @@ -21,7 +21,7 @@ * WITNESS REGISTER * WITNESS UNREGISTER * - * CLUSTER DIAGNOSE + * CLUSTER CROSSCHECK * CLUSTER MATRIX * CLUSTER SHOW * CLUSTER CLEANUP @@ -91,7 +91,7 @@ #define CLUSTER_SHOW 13 #define CLUSTER_CLEANUP 14 #define CLUSTER_MATRIX 15 -#define CLUSTER_DIAGNOSE 16 +#define CLUSTER_CROSSCHECK 16 static int test_ssh_connection(char *host, char *remote_user); static int copy_remote_files(char *host, char *remote_user, char *remote_path, @@ -114,7 +114,7 @@ static void get_barman_property(char *dst, char *name, char *local_repmgr_direct static char *string_skip_prefix(const char *prefix, char *string); static char *string_remove_trailing_newlines(char *string); static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length); -static int build_cluster_diagnose(t_node_status_cube ***cube_dest, int *name_length); +static int build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length); static char *make_pg_path(char *file); static char *make_barman_ssh_command(void); @@ -136,7 +136,7 @@ static void do_witness_unregister(void); static void do_cluster_show(void); static void do_cluster_matrix(void); -static void do_cluster_diagnose(void); +static void do_cluster_crosscheck(void); static void do_cluster_cleanup(void); static void do_check_upstream_config(void); static void do_help(void); @@ -665,7 +665,7 @@ main(int argc, char **argv) * { MASTER | PRIMARY } REGISTER | * STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} | * WITNESS { CREATE | REGISTER | UNREGISTER } | - * CLUSTER { DIAGNOSE | MATRIX | SHOW | CLEANUP} + * CLUSTER { CROSSCHECK | MATRIX | SHOW | CLEANUP} * * the node part is optional, if we receive it then we shouldn't have * received a -h option @@ -721,8 +721,8 @@ main(int argc, char **argv) action = CLUSTER_SHOW; else if (strcasecmp(server_cmd, "CLEANUP") == 0) action = CLUSTER_CLEANUP; - else if (strcasecmp(server_cmd, "DIAGNOSE") == 0) - action = CLUSTER_DIAGNOSE; + else if (strcasecmp(server_cmd, "CROSSCHECK") == 0) + action = CLUSTER_CROSSCHECK; else if (strcasecmp(server_cmd, "MATRIX") == 0) action = CLUSTER_MATRIX; } @@ -965,8 +965,8 @@ main(int argc, char **argv) case WITNESS_UNREGISTER: do_witness_unregister(); break; - case CLUSTER_DIAGNOSE: - do_cluster_diagnose(); + case CLUSTER_CROSSCHECK: + do_cluster_crosscheck(); break; case CLUSTER_MATRIX: do_cluster_matrix(); @@ -1435,7 +1435,7 @@ cube_set_node_status(t_node_status_cube **cube, int n, int execute_node_id, int static int -build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length) +build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length) { PGconn *conn; PGresult *res; @@ -1455,7 +1455,7 @@ build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length) " FROM %s.repl_show_nodes ORDER BY id", get_repmgr_schema_quoted(conn)); - log_verbose(LOG_DEBUG, "build_cluster_diagnose(): \n%s\n",sqlquery ); + log_verbose(LOG_DEBUG, "build_cluster_crosscheck(): \n%s\n",sqlquery ); res = PQexec(conn, sqlquery); @@ -1582,7 +1582,7 @@ build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length) host = param_get(&remote_conninfo, "host"); - log_verbose(LOG_DEBUG, "build_cluster_diagnose(): executing\n%s\n", quoted_command.data); + log_verbose(LOG_DEBUG, "build_cluster_crosscheck(): executing\n%s\n", quoted_command.data); (void)remote_command( host, @@ -1633,7 +1633,7 @@ build_cluster_diagnose(t_node_status_cube ***dest_cube, int *name_length) static void -do_cluster_diagnose(void) +do_cluster_crosscheck(void) { int i, n; char c; @@ -1642,7 +1642,7 @@ do_cluster_diagnose(void) t_node_status_cube **cube; - n = build_cluster_diagnose(&cube, &name_length); + n = build_cluster_crosscheck(&cube, &name_length); printf("%*s | Id ", name_length, node_header); for (i = 0; i < n; i++) @@ -4138,8 +4138,17 @@ do_standby_promote(void) * can't be sure when or if the promotion completes. * For now we'll poll the server until the default timeout (60 seconds) */ - maxlen_snprintf(script, "%s -D %s promote", - make_pg_path("pg_ctl"), data_dir); + + if (*options.service_promote_command) + { + maxlen_snprintf(script, "%s", options.service_promote_command); + } + else + { + maxlen_snprintf(script, "%s -D %s promote", + make_pg_path("pg_ctl"), data_dir); + } + log_notice(_("promoting server using '%s'\n"), script); @@ -4417,9 +4426,9 @@ do_standby_follow(void) exit(ERR_BAD_CONFIG); /* Finally, restart the service */ - if (*options.restart_command) + if (*options.service_restart_command) { - maxlen_snprintf(script, "%s", options.restart_command); + maxlen_snprintf(script, "%s", options.service_restart_command); } else { @@ -4922,10 +4931,6 @@ do_standby_switchover(void) * We'll issue the pg_ctl command but not force it not to wait; we'll check * the connection from here - and error out if no shutdown is detected * after a certain time. - * - * XXX currently we assume the same Postgres binary path on the primary - * as configured on the local standby; we may need to add a command - * line option to provide an explicit path (--remote-pg-bindir)? */ /* @@ -4936,9 +4941,9 @@ do_standby_switchover(void) initPQExpBuffer(&remote_command_str); - if (*options.stop_command) + if (*options.service_stop_command) { - appendPQExpBuffer(&remote_command_str, "%s", options.stop_command); + appendPQExpBuffer(&remote_command_str, "%s", options.service_stop_command); } else { @@ -5655,6 +5660,7 @@ do_witness_create(void) if (!runtime_options.superuser[0]) strncpy(runtime_options.superuser, "postgres", MAXLEN); + /* TODO: possibly allow the user to override this with a custom command? */ maxlen_snprintf(script, "%s %s -D %s init -o \"%s-U %s\"", make_pg_path("pg_ctl"), options.pg_ctl_options, runtime_options.dest_dir, @@ -5732,9 +5738,9 @@ do_witness_create(void) /* start new instance */ - if (*options.start_command) + if (*options.service_start_command) { - maxlen_snprintf(script, "%s", options.start_command); + maxlen_snprintf(script, "%s", options.service_start_command); } else { @@ -5858,9 +5864,17 @@ do_witness_create(void) } /* reload witness server to activate the copied pg_hba.conf */ - maxlen_snprintf(script, "%s %s -w -D %s reload", - make_pg_path("pg_ctl"), - options.pg_ctl_options, runtime_options.dest_dir); + if (*options.service_reload_command) + { + maxlen_snprintf(script, "%s", options.service_reload_command); + } + else + { + maxlen_snprintf(script, "%s %s -w -D %s reload", + make_pg_path("pg_ctl"), + options.pg_ctl_options, runtime_options.dest_dir); + } + log_info(_("reloading witness server configuration: %s"), script); r = system(script); if (r != 0) @@ -6169,7 +6183,7 @@ do_help(void) printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname()); printf(_(" %s [OPTIONS] witness {create|register|unregister}\n"), progname()); - printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname()); + printf(_(" %s [OPTIONS] cluster {show|matrix|crosscheck|cleanup}\n"), progname()); printf(_("\n")); printf(_("General options:\n")); printf(_(" -?, --help show this help, then exit\n")); diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 58a558db..b3dc3d06 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -106,8 +106,8 @@ # service control commands # -# repmgr provides options to to override the default pg_ctl commands -# used to stop, start and restart the PostgreSQL cluster +# repmgr provides options to override the default pg_ctl commands +# used to stop, start, restart, reload and promote the PostgreSQL cluster # # NOTE: These commands must be runnable on remote nodes as well for switchover # to function correctly. @@ -123,9 +123,11 @@ # /usr/bin/systemctl start postgresql-9.5, \ # /usr/bin/systemctl restart postgresql-9.5 # -# start_command = systemctl start postgresql-9.5 -# stop_command = systemctl stop postgresql-9.5 -# restart_command = systemctl restart postgresql-9.5 +# service_start_command = systemctl start postgresql-9.5 +# service_stop_command = systemctl stop postgresql-9.5 +# service_restart_command = systemctl restart postgresql-9.5 +# service_reload_command = pg_ctlcluster 9.5 main reload +# service_promote_command = pg_ctlcluster 9.5 main promote # external command options