From 112ca6321af8f6a54777d4540684d78b816ebe69 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 3 Aug 2017 16:38:37 +0900 Subject: [PATCH] Initial switchover implementation The repmgr3 implementation required the promotion candidate (standby) to directly work with the demotion candidate's data directory, directly execute server control commands etc. Here we delegated a lot more of that work to the repmgr on the demotion candidate, which reduces the amount of back-and-forth over SSH and generally makes things cleaner and smoother. In particular the repmgr on the demotion candidate will carry out a thorough check that the node is shut down and report the last checkpoint LSN to the promotion candidate; this can then be used to determine whether pg_rewind needs to be executed on the demoted primary before reintegrating it back into the cluster (todo). Also implement "--dry-run" for this action, which will sanity-check the nodes as far as possible without executing the switchover. Additionally some of the new repmgr node commands (or command options) introduced for this can be also executed by the user to obtain additional information about the status of each node. --- README.md | 13 + configfile.c | 24 +- configfile.h | 2 +- controldata.c | 22 +- controldata.h | 3 + dbutils.c | 57 ++-- dbutils.h | 5 +- dirutil.c | 12 +- errcode.h | 1 + repmgr-action-node.c | 174 +++++++++++- repmgr-action-standby.c | 610 +++++++++++++++++++++++++++++++++++++++- repmgr-client-global.h | 24 +- repmgr-client.c | 124 ++++++-- repmgr-client.h | 42 +-- repmgr.conf.sample | 13 +- 15 files changed, 1024 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index 1075989a..7afdbdb2 100644 --- a/README.md +++ b/README.md @@ -299,3 +299,16 @@ Note that under some circumstances (e.g. no replication cluster master could be located), it will not be possible to write an entry into the `repmgr.events` table, in which case executing a script via `event_notification_command` can serve as a fallback by generating some form of notification. + + +Diagnostics +----------- + + $ repmgr -f /etc/repmgr.conf node service --list-actions + Following commands would be executed for each action: + + start: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' start" + stop: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/pgsql/data' -m fast -W stop" + restart: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' restart" + reload: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' reload" + promote: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' promote" diff --git a/configfile.c b/configfile.c index 1a4f264d..01bcdb0b 100644 --- a/configfile.c +++ b/configfile.c @@ -209,7 +209,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->node_id = UNKNOWN_NODE_ID; memset(options->node_name, 0, sizeof(options->node_name)); memset(options->conninfo, 0, sizeof(options->conninfo)); - memset(options->pgdata, 0, sizeof(options->pgdata)); + memset(options->data_directory, 0, sizeof(options->data_directory)); memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); options->replication_type = REPLICATION_TYPE_PHYSICAL; @@ -228,6 +228,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->use_replication_slots = false; memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->ssh_options, 0, sizeof(options->ssh_options)); + strncpy(options->ssh_options, "-q", sizeof(options->ssh_options)); memset(options->replication_user, 0, sizeof(options->replication_user)); memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options)); memset(options->restore_command, 0, sizeof(options->restore_command)); @@ -342,8 +343,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->node_name, value, MAXLEN); else if (strcmp(name, "conninfo") == 0) strncpy(options->conninfo, value, MAXLEN); - else if (strcmp(name, "pgdata") == 0) - strncpy(options->pgdata, value, MAXPGPATH); + else if (strcmp(name, "data_directory") == 0) + strncpy(options->data_directory, value, MAXPGPATH); else if (strcmp(name, "replication_user") == 0) { if (strlen(value) < NAMEDATALEN) @@ -557,9 +558,9 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * item_list_append(error_list, _("\"node_name\": required parameter was not found")); } - if (!strlen(options->pgdata)) + if (!strlen(options->data_directory)) { - item_list_append(error_list, _("\"pgdata\": required parameter was not found")); + item_list_append(error_list, _("\"data_directory\": required parameter was not found")); } if (!strlen(options->conninfo)) @@ -1008,10 +1009,10 @@ parse_event_notifications_list(t_configuration_options *options, const char *arg bool parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list) { - int options_len = strlen(pg_basebackup_options) + 1; - char *options_string = pg_malloc(options_len); + int options_len = 0; + char *options_string = NULL; + char *options_string_ptr = NULL; - char *options_string_ptr = options_string; /* * Add parsed options to this list, then copy to an array * to pass to getopt @@ -1054,6 +1055,10 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti if (!strlen(pg_basebackup_options)) return backup_options_ok; + options_len = strlen(pg_basebackup_options) + 1; + options_string = pg_malloc(options_len); + options_string_ptr = options_string; + if (server_version_num >= 100000) long_options = long_options_10; else @@ -1143,5 +1148,8 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti backup_options_ok = false; } + pfree(options_string); + pfree(argv_array); + return backup_options_ok; } diff --git a/configfile.h b/configfile.h index 32f8484c..3059b7b9 100644 --- a/configfile.h +++ b/configfile.h @@ -53,8 +53,8 @@ typedef struct int node_id; char node_name[MAXLEN]; char conninfo[MAXLEN]; - char pgdata[MAXPGPATH]; char replication_user[NAMEDATALEN]; + char data_directory[MAXPGPATH]; char pg_bindir[MAXPGPATH]; int replication_type; diff --git a/controldata.c b/controldata.c index cc46b446..7c37e0e1 100644 --- a/controldata.c +++ b/controldata.c @@ -36,6 +36,27 @@ get_db_state(char *data_directory) return state; } + +XLogRecPtr +get_latest_checkpoint_location(char *data_directory) +{ + ControlFileInfo *control_file_info; + XLogRecPtr checkPoint; + + control_file_info = get_controlfile(data_directory); + + if (control_file_info->control_file_processed == false) + return InvalidXLogRecPtr; + + checkPoint = control_file_info->control_file->checkPoint; + + pfree(control_file_info->control_file); + pfree(control_file_info); + + return checkPoint; +} + + const char * describe_db_state(DBState state) { @@ -101,7 +122,6 @@ get_controlfile(char *DataDir) * against. However we're only interested in the first few fields, which * should be constant across supported versions * - * XXX double-check this */ return control_file_info; diff --git a/controldata.h b/controldata.h index 4b5a1a8b..655b283e 100644 --- a/controldata.h +++ b/controldata.h @@ -24,4 +24,7 @@ get_db_state(char *data_directory); extern const char * describe_db_state(DBState state); +extern XLogRecPtr +get_latest_checkpoint_location(char *data_directory); + #endif /* _CONTROLDATA_H_ */ diff --git a/dbutils.c b/dbutils.c index 0993496b..49d951f4 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1036,7 +1036,7 @@ _get_primary_connection(PGconn *conn, } /* find all registered nodes */ - log_info(_("retrieving node list")); + log_verbose(LOG_INFO, _("retrieving node list")); initPQExpBuffer(&query); appendPQExpBuffer(&query, @@ -1182,6 +1182,9 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) PQExpBufferData query; PGresult *res; + if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) + server_version_num = get_server_version(conn, NULL); + initPQExpBuffer(&query); appendPQExpBuffer( &query, @@ -1297,6 +1300,28 @@ get_repmgr_extension_status(PGconn *conn) return REPMGR_AVAILABLE; } +/* ========================= */ +/* node management functions */ +/* ========================= */ + +/* assumes superuser connection */ +void +checkpoint(PGconn *conn) +{ + PGresult *res; + + res = PQexec(conn, "CHECKPOINT"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to execute CHECKPOINT")); + log_detail("%s", PQerrorMessage(conn)); + } + + PQclear(res); + return; +} + /* ===================== */ /* Node record functions */ @@ -1819,7 +1844,7 @@ _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to %s node record:\n %s"), action, @@ -1854,7 +1879,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to update node record:\n %s"), PQerrorMessage(conn)); @@ -2065,7 +2090,7 @@ delete_node_record(PGconn *conn, int node) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to delete node record:\n %s"), PQerrorMessage(conn)); @@ -2478,7 +2503,7 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { /* we don't treat this as an error */ log_warning(_("unable to create event record:\n %s"), @@ -2717,7 +2742,7 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { appendPQExpBuffer(error_msg, _("unable to create slot '%s' on the upstream node: %s\n"), @@ -2750,7 +2775,7 @@ drop_replication_slot(PGconn *conn, char *slot_name) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to drop replication slot \"%s\":\n %s"), slot_name, @@ -2786,7 +2811,7 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to query pg_replication_slots:\n %s"), PQerrorMessage(conn)); @@ -2991,7 +3016,7 @@ get_voting_status(PGconn *conn) res = PQexec(conn, "SELECT repmgr.get_voting_status()"); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to query repmgr.get_voting_status():\n %s"), PQerrorMessage(conn)); @@ -3169,7 +3194,7 @@ notify_follow_primary(PGconn *conn, int primary_node_id) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"), PQerrorMessage(conn)); @@ -3228,7 +3253,7 @@ reset_voting_status(PGconn *conn) termPQExpBuffer(&query); // COMMAND_OK? - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to execute repmgr.reset_voting_status():\n %s"), PQerrorMessage(conn)); @@ -3448,7 +3473,7 @@ add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to add table 'repmgr.%s' to replication set '%s':\n %s"), tablename, @@ -3485,7 +3510,7 @@ bdr_node_exists(PGconn *conn, const char *node_name) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { node_exists = false; } @@ -3524,7 +3549,7 @@ get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { status = SLOT_UNKNOWN; } @@ -3596,7 +3621,7 @@ add_extension_tables_to_bdr_replication_set(PGconn *conn) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { // } @@ -3679,7 +3704,7 @@ get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to retrieve BDR node record for \"%s\":\n %s"), node_name, diff --git a/dbutils.h b/dbutils.h index fe914cd3..795497e2 100644 --- a/dbutils.h +++ b/dbutils.h @@ -326,8 +326,11 @@ bool get_replication_info(PGconn *conn, ReplInfo *replication_info); /* extension functions */ ExtensionStatus get_repmgr_extension_status(PGconn *conn); +/* node management functions */ +void checkpoint(PGconn *conn); + /* result functions */ -bool atobool(const char *value); +bool atobool(const char *value); /* node record functions */ t_server_type parse_node_type(const char *type); diff --git a/dirutil.c b/dirutil.c index 9f80ad38..6cccd721 100644 --- a/dirutil.c +++ b/dirutil.c @@ -216,21 +216,15 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *path) { - const size_t buf_sz = 8192; - char dirpath[buf_sz]; + char dirpath[MAXPGPATH]; struct stat sb; - int r; /* test pgdata */ - snprintf(dirpath, buf_sz, "%s/PG_VERSION", path); + snprintf(dirpath, MAXPGPATH, "%s/PG_VERSION", path); if (stat(dirpath, &sb) == 0) return true; - /* test tablespace dir */ - sprintf(dirpath, "ls %s/PG_*/ -I*", path); - r = system(dirpath); - if (r == 0) - return true; + /* TODO: sanity check other files */ return false; } diff --git a/errcode.h b/errcode.h index d9b7d9a0..26e7273f 100644 --- a/errcode.h +++ b/errcode.h @@ -13,6 +13,7 @@ #define ERR_BAD_RSYNC 2 #define ERR_BAD_PIDFILE 3 #define ERR_NO_RESTART 4 +#define ERR_LOCAL_COMMAND 5 #define ERR_DB_CONN 6 #define ERR_DB_QUERY 7 #define ERR_PROMOTION_FAIL 8 diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 15a71730..f1b771af 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -10,6 +10,8 @@ #include #include "repmgr.h" +#include "controldata.h" +#include "dirutil.h" #include "repmgr-client-global.h" #include "repmgr-action-node.h" @@ -19,7 +21,8 @@ static void format_archive_dir(char *archive_dir); static t_server_action parse_server_action(const char *action); static void _do_node_service_check(void); -static void _do_node_service_list(t_server_action action); +static void _do_node_service_list_actions(t_server_action action); +static void _do_node_status_is_shutdown(void); void do_node_status(void) @@ -39,6 +42,12 @@ do_node_status(void) RecoveryType recovery_type; ReplInfo replication_info = T_REPLINFO_INTIALIZER; + if (runtime_options.is_shutdown == true) + { + return _do_node_status_is_shutdown(); + } + + if (strlen(config_file_options.conninfo)) conn = establish_db_connection(config_file_options.conninfo, true); else @@ -59,6 +68,7 @@ do_node_status(void) PQfinish(conn); exit(ERR_BAD_CONFIG); } + (void) get_server_version(conn, server_version); if (get_cluster_size(conn, cluster_size) == false) @@ -279,6 +289,92 @@ do_node_status(void) } +/* + * --status=(RUNNING|SHUTDOWN|UNKNOWN) + * --last-checkpoint=... + */ + +static +void _do_node_status_is_shutdown(void) +{ + PGPing status; + PQExpBufferData output; + + bool is_shutdown = true; + DBState db_state; + XLogRecPtr checkPoint; + + initPQExpBuffer(&output); + + appendPQExpBuffer( + &output, + "--state="); + + /* sanity-check we're dealing with a PostgreSQL directory */ + if (is_pg_dir(config_file_options.data_directory) == false) + { + appendPQExpBuffer(&output, "UNKNOWN"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + + status = PQping(config_file_options.conninfo); + + switch (status) + { + case PQPING_OK: + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + break; + case PQPING_REJECT: + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + break; + case PQPING_NO_ATTEMPT: + case PQPING_NO_RESPONSE: + /* status not yet clear */ + break; + } + + /* check what pg_controldata says */ + + db_state = get_db_state(config_file_options.data_directory); + + if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY) + { + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + } + + + checkPoint = get_latest_checkpoint_location(config_file_options.data_directory); + + /* unable to read pg_control, don't know what's happening */ + if (checkPoint == InvalidXLogRecPtr) + { + appendPQExpBuffer(&output, "UNKNOWN"); + is_shutdown = false; + } + + /* server is running in some state - just output --status */ + if (is_shutdown == false) + { + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + appendPQExpBuffer(&output, + "SHUTDOWN --last-checkpoint-lsn=%X/%X", + format_lsn(checkPoint)); + + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; +} + void do_node_check(void) @@ -289,10 +385,15 @@ do_node_check(void) // --action=... // --check // --list -> list what would be executed for each action, filter to --action + +// --checkpoint must be run as superuser - check connection void do_node_service(void) { - t_server_action action; + t_server_action action = ACTION_UNKNOWN; + char data_dir[MAXPGPATH] = ""; + char command[MAXLEN] = ""; + PQExpBufferData output; action = parse_server_action(runtime_options.action); @@ -312,23 +413,66 @@ do_node_service(void) return _do_node_service_check(); } - if (runtime_options.list == true) + if (runtime_options.list_actions == true) { - return _do_node_service_list(action); + return _do_node_service_list_actions(action); } - // do we need data directory? - // - service command defined for action ? -> no - // -> yes - // - pgdata defined in config? OK - // - // - connection available? - // -> get data dir OK (superuser connection issue) + if (data_dir_required_for_action(action)) + { + get_node_data_directory(data_dir); + + if (data_dir[0] == '\0') + { + log_error(_("unable to determine data directory for action")); + exit(ERR_BAD_CONFIG); + } + } - // perform action... - // --dry-run: print only + if ((action == ACTION_STOP || action == ACTION_RESTART) && runtime_options.checkpoint == true) + { + if (runtime_options.dry_run == true) + { + log_info(_("a CHECKPOINT would be issued here")); + } + else + { + PGconn *conn; + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, true); + else + conn = establish_db_connection_by_params(&source_conninfo, true); + + log_notice(_("issuing CHECKPOINT")); + + // check superuser conn! + checkpoint(conn); + + PQfinish(conn); + } + } + + get_server_action(action, command, data_dir); + + if (runtime_options.dry_run == true) + { + log_info(_("would execute server command \"%s\""), command); + return; + } + + log_notice(_("executing server command \"%s\""), command); + + initPQExpBuffer(&output); + + if (local_command(command, &output) == false) + { + exit(ERR_LOCAL_COMMAND); + } + + termPQExpBuffer(&output); } @@ -339,7 +483,7 @@ _do_node_service_check(void) static void -_do_node_service_list(t_server_action action) +_do_node_service_list_actions(t_server_action action) { char command[MAXLEN] = ""; @@ -644,6 +788,8 @@ do_node_restore_config(void) } + + static void format_archive_dir(char *archive_dir) { diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 7af83380..feb8b3c9 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -77,6 +77,7 @@ static void get_barman_property(char *dst, char *name, char *local_repmgr_direct static int get_tablespace_data_barman(char *, TablespaceDataList *); static char *make_barman_ssh_command(char *buf); +static NodeStatus parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkPoint); /* * do_standby_clone() @@ -1305,10 +1306,23 @@ do_standby_follow(void) */ else { + if (config_file_options.data_directory[0] == '\0') + { + if (runtime_options.data_dir[0] == '\0') + { + log_error(_("-D/--pgdata required when providing connection parameters for \"standby follow\"")); + exit(ERR_BAD_CONFIG); + } + strncpy(data_dir, runtime_options.data_dir, MAXPGPATH); + } + else + { + strncpy(data_dir, config_file_options.data_directory, MAXPGPATH); + } + primary_conn = establish_db_connection_by_params(&source_conninfo, true); primary_id = get_primary_node_id(primary_conn); - strncpy(data_dir, runtime_options.data_dir, MAXPGPATH); } if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY) @@ -1530,10 +1544,478 @@ do_standby_follow(void) return; } + +/* + * Perform a switchover by: + * - stopping current primary node + * - promoting this standby node to primary + * - forcing previous primary node to follow this node + * + * Caveats: + * - repmgrd must not be running, otherwise it may + * attempt a failover + * (TODO: find some way of notifying repmgrd of planned + * activity like this) + * - currently only set up for two-node operation; any other + * standbys will probably become downstream cascaded standbys + * of the old primary once it's restarted + * - as we're executing repmgr remotely (on the old primary), + * we'll need the location of its configuration file; this + * can be provided explicitly with -C/--remote-config-file, + * otherwise repmgr will look in default locations on the + * remote server + * + * TODO: + * - make connection test timeouts/intervals configurable (see below) + */ + + void do_standby_switchover(void) { - puts("not implemented"); + PGconn *local_conn; + PGconn *remote_conn; + + t_node_info local_node_record = T_NODE_INFO_INITIALIZER; + + + /* the remote server is the primary to be demoted */ + char remote_conninfo[MAXCONNINFO] = ""; + char remote_host[MAXLEN] = ""; + int remote_node_id; + t_node_info remote_node_record = T_NODE_INFO_INITIALIZER; + + RecordStatus record_status; + RecoveryType recovery_type; + PQExpBufferData remote_command_str; + PQExpBufferData command_output; + + int r, i; + + bool shutdown_success = false; + XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr; + ReplInfo replication_info = T_REPLINFO_INTIALIZER; + + /* + * SANITY CHECKS + * + * We'll be doing a bunch of operations on the remote server (primary + * to be demoted) - careful checks needed before proceding. + */ + + local_conn = establish_db_connection(config_file_options.conninfo, true); + + record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record); + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve node record for node %i"), + config_file_options.node_id); + + PQfinish(local_conn); + exit(ERR_DB_QUERY); + } + + if (!is_streaming_replication(local_node_record.type)) + { + log_error(_("switchover can only performed with streaming replication")); + PQfinish(local_conn); + exit(ERR_BAD_CONFIG); + } + + if (runtime_options.dry_run == true) + { + log_notice(_("checking switchover on node \"%s\" (ID: %i) in --dry-run mode"), + local_node_record.node_name, + local_node_record.node_id); + } + else + { + log_notice(_("executing switchover on node \"%s\" (ID: %i)"), + local_node_record.node_name, + local_node_record.node_id); + } + + /* Check that this is a standby */ + recovery_type = get_recovery_type(local_conn); + if (recovery_type != RECTYPE_STANDBY) + { + log_error(_("switchover must be executed from the standby node to be promoted")); + if (recovery_type == RECTYPE_PRIMARY) + { + log_detail(_("this node (ID: %i) is the primary"), + local_node_record.node_id); + } + PQfinish(local_conn); + + exit(ERR_SWITCHOVER_FAIL); + } + + /* check remote server connection and retrieve its record */ + remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo); + + if (PQstatus(remote_conn) != CONNECTION_OK) + { + log_error(_("unable to connect to current primary node")); + log_hint(_("check that the cluster is correctly configured and this standby is registered")); + PQfinish(local_conn); + exit(ERR_DB_CONN); + } + + record_status = get_node_record(remote_conn, remote_node_id, &remote_node_record); + + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve node record for node %i"), + remote_node_id); + + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_DB_QUERY); + } + + /* + * Check this standby is attached to the demotion candidate + * TODO: + * - check standby is attached to demotion candidate + * (compare primary_conninfo from recovery.conf) + */ + + if (local_node_record.upstream_node_id != remote_node_record.node_id) + { + log_error(_("local node %i is not a downstream of demotion candidate primary %i"), + local_node_record.upstream_node_id, + remote_node_record.node_id); + + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_BAD_CONFIG); + } + + log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name); + + PQfinish(local_conn); + PQfinish(remote_conn); + + /* + * Check that we can connect by SSH to the remote (current primary) server + */ + get_conninfo_value(remote_conninfo, "host", remote_host); + + r = test_ssh_connection(remote_host, runtime_options.remote_user); + + if (r != 0) + { + log_error(_("unable to connect via SSH to host \"%s\", user \"%s\""), + remote_host, runtime_options.remote_user); + exit(ERR_BAD_CONFIG); + } + + + /* Determine the remote's configuration file location */ + /* -------------------------------------------------- */ + + /* Remote configuration file provided - check it exists */ + /* TODO have remote node verify config file "node status --config-file */ + if (runtime_options.remote_config_file[0]) + + { + log_verbose(LOG_INFO, _("looking for file \"%s\" on remote server \"%s\""), + runtime_options.remote_config_file, + remote_host); + + initPQExpBuffer(&remote_command_str); + appendPQExpBuffer(&remote_command_str, "ls "); + + appendShellString(&remote_command_str, runtime_options.remote_config_file); + appendPQExpBuffer(&remote_command_str, " >/dev/null 2>&1 && echo 1 || echo 0"); + + initPQExpBuffer(&command_output); + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (*command_output.data == '0') + { + log_error(_("unable to find the specified repmgr configuration file on remote server")); + log_detail(_("remote configuration file is \"%s\""), + runtime_options.remote_config_file); + exit(ERR_BAD_CONFIG); + } + + log_verbose(LOG_INFO, _("remote configuration file \"%s\" found on remote server"), + runtime_options.remote_config_file); + + termPQExpBuffer(&command_output); + } + /* + * No remote configuration file provided - check some default locations: + * - path of configuration file for this repmgr + * - /etc/repmgr.conf + */ + else + { + int i; + bool remote_config_file_found = false; + + const char *config_paths[] = { + runtime_options.config_file, + "/etc/repmgr.conf", + NULL + }; + + log_verbose(LOG_INFO, _("no remote configuration file provided - checking default locations")); + + for (i = 0; config_paths[i] && remote_config_file_found == false; ++i) + { + /* + * Don't attempt to check for an empty filename - this might be the case + * if no local configuration file was found. + */ + if (!strlen(config_paths[i])) + continue; + + log_verbose(LOG_INFO, _("checking \"%s\"\n"), config_paths[i]); + + initPQExpBuffer(&remote_command_str); + appendPQExpBuffer(&remote_command_str, "ls "); + + appendShellString(&remote_command_str, config_paths[i]); + appendPQExpBuffer(&remote_command_str, " >/dev/null 2>&1 && echo 1 || echo 0"); + + initPQExpBuffer(&command_output); + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (*command_output.data == '1') + { + strncpy(runtime_options.remote_config_file, config_paths[i], MAXLEN); + log_verbose(LOG_INFO, _("configuration file \"%s\" found on remote server"), + runtime_options.remote_config_file); + remote_config_file_found = true; + } + + termPQExpBuffer(&command_output); + } + + if (remote_config_file_found == false) + { + log_error(_("no remote configuration file supplied or found in a default location - terminating")); + log_hint(_("specify the remote configuration file with -C/--remote-config-file")); + exit(ERR_BAD_CONFIG); + } + } + + + /* + * Sanity checks completed - prepare for the switchover + */ + + log_detail(_("local node \"%s\" (ID: %i) will be promoted to primary; " + "current primary \"%s\" (ID: %i) will be demoted to standby"), + local_node_record.node_name, + local_node_record.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + + /* + * Stop the remote primary + * + * We'll issue the pg_ctl command but not force it not to wait; we'll check + * the connection from here - and error out if no shutdown is detected + * after a certain time. + */ + + // TODO: check remote node for archive status etc. + + initPQExpBuffer(&remote_command_str); + initPQExpBuffer(&command_output); + + make_remote_repmgr_path(&remote_command_str); + + + if (runtime_options.dry_run == true) + { + appendPQExpBuffer(&remote_command_str, + "node service --terse -LERROR --list-actions --action=stop"); + + } + else + { + appendPQExpBuffer(&remote_command_str, + "node service --action=stop --checkpoint"); + } + + // XXX handle failure + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + + termPQExpBuffer(&remote_command_str); + + + /* + * --dry-run ends here with display of command which would be used to + * shut down the remote server + */ + if (runtime_options.dry_run == true) + { + log_info(_("following shutdown command would be run on node \"%s\":\n \"%s\""), + remote_node_record.node_name, + command_output.data); + termPQExpBuffer(&command_output); + return; + } + + termPQExpBuffer(&command_output); + shutdown_success = false; + + /* loop for timeout waiting for current primary to stop */ + + for (i = 0; i < config_file_options.reconnect_attempts; i++) + { + /* Check whether primary is available */ + PGPing ping_res; + + log_info(_("checking primary status; %i of %i attempts"), + i + 1, config_file_options.reconnect_attempts); + ping_res = PQping(remote_conninfo); + + + /* database server could not be contacted */ + if (ping_res == PQPING_NO_RESPONSE ||PQPING_NO_ATTEMPT) + { + bool command_success; + + /* + * remote server can't be contacted at protocol level - that + * doesn't necessarily mean it's shut down, so we'll ask + * its repmgr to check at data directory level, and if shut down + * also return the last checkpoint LSN. + */ + + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + "node status --is-shutdown"); + + initPQExpBuffer(&command_output); + + command_success = remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (command_success == true) + { + NodeStatus status = parse_node_status_is_shutdown(command_output.data, &remote_last_checkpoint_lsn); + + if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr) + { + shutdown_success = true; + log_notice(_("current primary has been shut down at location %X/%X"), + format_lsn(remote_last_checkpoint_lsn)); + termPQExpBuffer(&command_output); + + break; + } + } + + termPQExpBuffer(&command_output); + } + + /* XXX make configurable? */ + sleep(config_file_options.reconnect_interval); + i++; + } + + if (shutdown_success == false) + { + log_error(_("shutdown of the primary server could not be confirmed")); + log_hint(_("check the primary server status before performing any further actions")); + exit(ERR_SWITCHOVER_FAIL); + } + + + local_conn = establish_db_connection(config_file_options.conninfo, false); + + if (PQstatus(local_conn) != CONNECTION_OK) + { + log_error(_("unable to reestablish connection to local node \"%s\""), + local_node_record.node_name); + exit(ERR_DB_CONN); + } + + get_replication_info(local_conn, &replication_info); + + if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) + { + log_warning(_("local node \"%s\" is behind shutdown primary \"%s\""), + local_node_record.node_name, + remote_node_record.node_name); + log_detail(_("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X"), + format_lsn(replication_info.last_wal_receive_lsn), + format_lsn(remote_last_checkpoint_lsn)); + + if (runtime_options.always_promote == false) + { + log_notice(_("aborting switchover")); + log_hint(_("use --always-promote to force promotion of standby")); + PQfinish(local_conn); + exit(ERR_SWITCHOVER_FAIL); + } + } + + /* promote standby */ + + // XXX need stripped-down version which skips the sanity checks etc + do_standby_promote(); + + if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) + { + // if --force-rewind was supplied, do that now, otherwise exit + } + + /* + * Execute `repmgr standby follow` to create recovery.conf and start + * the remote server + * + * XXX replace with "node rejoin" + */ + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + " -d \\'%s\\' standby follow", + local_node_record.conninfo); + log_debug("executing:\n \"%s\"", remote_command_str.data); + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + NULL); + + termPQExpBuffer(&remote_command_str); + + return; } @@ -2708,6 +3190,7 @@ copy_configuration_files(void) log_notice(_("copying external configuration files from upstream node")); r = test_ssh_connection(host, runtime_options.remote_user); + if (r != 0) { log_error(_("remote host %s is not reachable via SSH - unable to copy external configuration files"), @@ -2876,3 +3359,126 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name) } } } + + + +static NodeStatus +parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkPoint) +{ + int options_len = 0; + char *options_string = NULL; + char *options_string_ptr = NULL; + NodeStatus node_status = NODE_STATUS_UNKNOWN; + + /* + * Add parsed options to this list, then copy to an array + * to pass to getopt + */ + static ItemList option_argv = { NULL, NULL }; + + char *argv_item; + int c, argc_item = 1; + + char **argv_array; + ItemListCell *cell; + + int optindex = 0; + + /* We're only interested in these options */ + static struct option long_options[] = + { + {"last-checkpoint-lsn", required_argument, NULL, 'L'}, + {"state", required_argument, NULL, 'S'}, + {NULL, 0, NULL, 0} + }; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_status_output)) + { + *checkPoint = InvalidXLogRecPtr; + return node_status; + } + + options_len = strlen(node_status_output) + 1; + options_string = pg_malloc(options_len); + options_string_ptr = options_string; + + /* Copy the string before operating on it with strtok() */ + strncpy(options_string, node_status_output, options_len); + + /* Extract arguments into a list and keep a count of the total */ + while ((argv_item = strtok(options_string_ptr, " ")) != NULL) + { + item_list_append(&option_argv, argv_item); + + argc_item++; + + if (options_string_ptr != NULL) + options_string_ptr = NULL; + } + + /* + * Array of argument values to pass to getopt_long - this will need to + * include an empty string as the first value (normally this would be + * the program name) + */ + argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2)); + + /* Insert a blank dummy program name at the start of the array */ + argv_array[0] = pg_malloc0(1); + + c = 1; + + /* + * Copy the previously extracted arguments from our list to the array + */ + for (cell = option_argv.head; cell; cell = cell->next) + { + int argv_len = strlen(cell->string) + 1; + + argv_array[c] = pg_malloc0(argv_len); + + strncpy(argv_array[c], cell->string, argv_len); + + c++; + } + + argv_array[c] = NULL; + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "L:S:", long_options, + &optindex)) != -1) + { + switch (c) + { + /* --last-checkpoint-lsn */ + case 'L': + *checkPoint = parse_lsn(optarg); + break; + /* --state */ + case 'S': + { + if (strncmp(optarg, "RUNNING", MAXLEN) == 0) + { + node_status = NODE_STATUS_UP; + } + else if (strncmp(optarg, "SHUTDOWN", MAXLEN) == 0) + { + node_status = NODE_STATUS_DOWN; + } + else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0) + { + node_status = NODE_STATUS_UNKNOWN; + } + } + break; + } + } + + return node_status; +} diff --git a/repmgr-client-global.h b/repmgr-client-global.h index d7cfdbab..9ebe4ac5 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -73,10 +73,19 @@ typedef struct bool wait_register_sync; int wait_register_sync_seconds; + /* "standby switchover" options */ + char remote_config_file[MAXPGPATH]; + bool always_promote; + bool force_rewind; + + /* "node status" options */ + bool is_shutdown; + /* "node service" options */ char action[MAXLEN]; bool check; - bool list; + bool list_actions; + bool checkpoint; /* "cluster event" options */ bool all; @@ -99,15 +108,20 @@ typedef struct /* database connection options */ \ "", "", "", "", \ /* other connection options */ \ - "", "", \ + "", "", \ /* node options */ \ UNKNOWN_NODE_ID, "", "", \ /* "standby clone" options */ \ - false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", NO_UPSTREAM_NODE, false, "", false, \ + false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \ + NO_UPSTREAM_NODE, false, "", false, \ /* "standby register" options */ \ false, 0, \ + /* "standby switchover" options */ \ + "", false, false, \ + /* "node status" options */ \ + false, \ /* "node service" options */ \ - "", false, false, \ + "", false, false, false, \ /* "cluster event" options */ \ false, "", CLUSTER_EVENT_LIMIT, \ "/tmp" \ @@ -167,6 +181,8 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf); +extern void make_remote_repmgr_path(PQExpBufferData *outputbuf); + /* server control functions */ extern void get_server_action(t_server_action action, char *script, char *data_dir); extern bool data_dir_required_for_action(t_server_action action); diff --git a/repmgr-client.c b/repmgr-client.c index beb7a1a2..894a39ba 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -15,6 +15,7 @@ * STANDBY REGISTER * STANDBY UNREGISTER * STANDBY PROMOTE + * STANDBY SWITCHOVER * * BDR REGISTER * BDR UNREGISTER @@ -156,7 +157,7 @@ main(int argc, char **argv) strncpy(runtime_options.username, pw->pw_name, MAXLEN); } - while ((c = getopt_long(argc, argv, "?Vb:f:Fd:h:p:U:R:S:L:vtD:cr", long_options, + while ((c = getopt_long(argc, argv, "?Vb:f:Fd:h:p:U:R:S:L:vtD:crC:", long_options, &optindex)) != -1) { /* @@ -320,13 +321,15 @@ main(int argc, char **argv) { runtime_options.copy_external_config_files_destination = CONFIG_FILE_SAMEPATH; } - else if (strcmp(optarg, "pgdata") == 0) + /* allow "data_directory" as synonym for "pgdata" */ + else if (strcmp(optarg, "pgdata") == 0 || strcmp(optarg, "data_directory") == 0) { runtime_options.copy_external_config_files_destination = CONFIG_FILE_PGDATA; } else { - item_list_append(&cli_errors, _("value provided for '--copy-external-config-files' must be 'samepath' or 'pgdata'")); + item_list_append(&cli_errors, + _("value provided for \"--copy-external-config-files\" must be \"samepath\" or \"pgdata\"")); } } break; @@ -395,6 +398,28 @@ main(int argc, char **argv) } break; + /* "standby switchover" options * + * ---------------------------- */ + + case 'C': + strncpy(runtime_options.remote_config_file, optarg, MAXPGPATH); + break; + + case OPT_ALWAYS_PROMOTE: + runtime_options.always_promote = true; + break; + + case OPT_FORCE_REWIND: + runtime_options.force_rewind = true; + break; + + /* "node status" options * + * --------------------- */ + + case OPT_IS_SHUTDOWN: + runtime_options.is_shutdown = true; + break; + /* "node service" options * * ---------------------- */ @@ -403,14 +428,18 @@ main(int argc, char **argv) strncpy(runtime_options.action, optarg, MAXLEN); break; - case OPT_LIST: - runtime_options.list = true; + case OPT_LIST_ACTIONS: + runtime_options.list_actions = true; break; case OPT_CHECK: runtime_options.check = true; break; + case OPT_CHECKPOINT: + runtime_options.checkpoint = true; + break; + /* "cluster event" options * * ----------------------- */ @@ -1122,16 +1151,7 @@ check_cli_parameters(const int action) /* * if `repmgr standby follow` executed with host params, ensure data * directory was provided - * XXX not needed */ - if (runtime_options.host_param_provided == true) - { - if (runtime_options.data_dir[0] == '\0') - { - item_list_append_format(&cli_errors, - _("-D/--pgdata required when providing connection parameters for \"standby follow\"")); - } - } } break; @@ -1315,7 +1335,7 @@ check_cli_parameters(const int action) } /* repmgr node service --action */ - if (runtime_options.action) + if (runtime_options.action[0] != '\0') { switch (action) { @@ -1323,10 +1343,52 @@ check_cli_parameters(const int action) break; default: item_list_append_format(&cli_warnings, - _("--action not required when executing %s"), + _("--action will be ignored when executing %s"), action_name(action)); } } + + /* repmgr node status --is-shutdown */ + if (runtime_options.is_shutdown == true) + { + switch (action) + { + case NODE_STATUS: + break; + default: + item_list_append_format(&cli_warnings, + _("--is-shutdown will be ignored when executing %s"), + action_name(action)); + } + } + + if (runtime_options.always_promote == true) + { + switch (action) + { + case STANDBY_SWITCHOVER: + break; + default: + item_list_append_format(&cli_warnings, + _("--always-promote will be ignored when executing %s"), + action_name(action)); + } + } + + if ( runtime_options.force_rewind == true) + { + switch (action) + { + case STANDBY_SWITCHOVER: + break; + default: + item_list_append_format(&cli_warnings, + _("--force-rewind will be ignored when executing %s"), + action_name(action)); + } + } + + } @@ -1461,7 +1523,7 @@ do_help(void) puts(""); - printf(_("CLUSTER SHOW options:\n")); + printf(_("CLUSTER EVENT options:\n")); printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT); printf(_(" --all display all events (overrides --limit)\n")); printf(_(" --event filter specific event\n")); @@ -1706,8 +1768,8 @@ test_ssh_connection(char *host, char *remote_user) /* - * Execute a command locally. If outputbuf == NULL, discard the - * output. + * Execute a command locally. "outputbuf" should either be an + * initialised PQexpbuffer, or NULL */ bool local_command(const char *command, PQExpBufferData *outputbuf) @@ -2496,7 +2558,7 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf appendPQExpBuffer(&ssh_host, "%s@", user); } - appendPQExpBuffer(&ssh_host, "%s",host); + appendPQExpBuffer(&ssh_host, "%s", host); maxlen_snprintf(ssh_command, "ssh -o Batchmode=yes %s %s %s", @@ -2506,7 +2568,7 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf termPQExpBuffer(&ssh_host); - log_debug("remote_command(): %s\n", ssh_command); + log_debug("remote_command():\n %s", ssh_command); fp = popen(ssh_command, "r"); @@ -2549,6 +2611,20 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf } +void +make_remote_repmgr_path(PQExpBufferData *output_buf) +{ + appendPQExpBuffer(output_buf, + "%s ", make_pg_path("repmgr")); + + if (runtime_options.remote_config_file[0] != '\0') + { + appendPQExpBuffer(output_buf, + "-f %s ", runtime_options.remote_config_file); + } +} + + /* ======================== */ /* server control functions */ /* ======================== */ @@ -2786,15 +2862,13 @@ data_dir_required_for_action(t_server_action action) void get_node_data_directory(char *data_dir_buf) { - PGconn *conn = NULL; - /* * the configuration file setting has priority, and will always be * set when a configuration file was provided */ - if (config_file_options.pgdata[0] != '\0') + if (config_file_options.data_directory[0] != '\0') { - strncpy(data_dir_buf, config_file_options.pgdata, MAXPGPATH); + strncpy(data_dir_buf, config_file_options.data_directory, MAXPGPATH); return; } diff --git a/repmgr-client.h b/repmgr-client.h index 22588a08..9bdd2daf 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -48,21 +48,23 @@ #define OPT_WITHOUT_BARMAN 12 #define OPT_NO_UPSTREAM_CONNECTION 13 #define OPT_REGISTER_WAIT 14 -#define OPT_CLUSTER 15 -#define OPT_LOG_TO_FILE 16 -#define OPT_UPSTREAM_CONNINFO 17 +#define OPT_LOG_TO_FILE 15 +#define OPT_UPSTREAM_CONNINFO 16 /* replaces --no-conninfo-password */ -#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 18 -#define OPT_REPLICATION_USER 19 -#define OPT_EVENT 20 -#define OPT_LIMIT 21 -#define OPT_ALL 22 -#define OPT_DRY_RUN 23 -#define OPT_UPSTREAM_NODE_ID 24 -#define OPT_ACTION 25 +#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 17 +#define OPT_REPLICATION_USER 18 +#define OPT_EVENT 19 +#define OPT_LIMIT 20 +#define OPT_ALL 21 +#define OPT_DRY_RUN 22 +#define OPT_UPSTREAM_NODE_ID 23 +#define OPT_ACTION 24 +#define OPT_LIST_ACTIONS 25 #define OPT_CHECK 26 -#define OPT_LIST 27 - +#define OPT_CHECKPOINT 27 +#define OPT_IS_SHUTDOWN 28 +#define OPT_ALWAYS_PROMOTE 29 +#define OPT_FORCE_REWIND 30 /* deprecated since 3.3 */ #define OPT_DATA_DIR 998 #define OPT_NO_CONNINFO_PASSWORD 999 @@ -97,7 +99,7 @@ static struct option long_options[] = /* logging options */ {"log-level", required_argument, NULL, 'L'}, {"log-to-file", no_argument, NULL, OPT_LOG_TO_FILE}, - {"terse", required_argument, NULL, 't'}, + {"terse", no_argument, NULL, 't'}, {"verbose", no_argument, NULL, 'v'}, /* output options */ @@ -118,10 +120,19 @@ static struct option long_options[] = /* "standby register" options */ {"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT}, +/* "standby switchover" options */ + {"remote-config-file", required_argument, NULL, 'C'}, + {"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE }, + {"force-rewind", no_argument, NULL, OPT_FORCE_REWIND }, + +/* "node status" options */ + {"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN }, + /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION}, {"check", no_argument, NULL, OPT_CHECK}, - {"list", no_argument, NULL, OPT_LIST}, + {"list-actions", no_argument, NULL, OPT_LIST_ACTIONS}, + {"checkpoint", no_argument, NULL, OPT_CHECKPOINT}, /* "cluster event" options */ {"all", no_argument, NULL, OPT_ALL }, @@ -139,7 +150,6 @@ static struct option long_options[] = /* not yet handled */ {"keep-history", required_argument, NULL, 'k'}, {"mode", required_argument, NULL, 'm'}, - {"remote-config-file", required_argument, NULL, 'C'}, {"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG}, {"pg_rewind", optional_argument, NULL, OPT_PG_REWIND}, {"pwprompt", optional_argument, NULL, OPT_PWPROMPT}, diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 7482d1d1..bd5801a5 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -35,7 +35,10 @@ # connection attempt is abandoned; for details see: # https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT -#pgdata # The node's data directory +#data_directory # The node's data directory. This is needed by repmgr + # when performing operations when the PostgreSQL instance + # is not running and there's no other way of determining + # the data directory. # ============================================================================= @@ -139,10 +142,10 @@ # rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" # ssh_options=-o "StrictHostKeyChecking no" -pg_ctl_options='' # Options to append to "pg_ctl" -pg_basebackup_options='' # Options to append to "pg_basebackup" -rsync_options='' # Options to append to "rsync" -ssh_options='' # Options to append to "ssh" +#pg_ctl_options='' # Options to append to "pg_ctl" +#pg_basebackup_options='' # Options to append to "pg_basebackup" +#rsync_options='' # Options to append to "rsync" +ssh_options='-q' # Options to append to "ssh"