diff --git a/README.md b/README.md index 1075989a..7afdbdb2 100644 --- a/README.md +++ b/README.md @@ -299,3 +299,16 @@ Note that under some circumstances (e.g. no replication cluster master could be located), it will not be possible to write an entry into the `repmgr.events` table, in which case executing a script via `event_notification_command` can serve as a fallback by generating some form of notification. + + +Diagnostics +----------- + + $ repmgr -f /etc/repmgr.conf node service --list-actions + Following commands would be executed for each action: + + start: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' start" + stop: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/pgsql/data' -m fast -W stop" + restart: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' restart" + reload: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' reload" + promote: "/usr/bin/pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/pgsql/data' promote" diff --git a/configfile.c b/configfile.c index 1a4f264d..01bcdb0b 100644 --- a/configfile.c +++ b/configfile.c @@ -209,7 +209,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->node_id = UNKNOWN_NODE_ID; memset(options->node_name, 0, sizeof(options->node_name)); memset(options->conninfo, 0, sizeof(options->conninfo)); - memset(options->pgdata, 0, sizeof(options->pgdata)); + memset(options->data_directory, 0, sizeof(options->data_directory)); memset(options->pg_bindir, 0, sizeof(options->pg_bindir)); options->replication_type = REPLICATION_TYPE_PHYSICAL; @@ -228,6 +228,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->use_replication_slots = false; memset(options->rsync_options, 0, sizeof(options->rsync_options)); memset(options->ssh_options, 0, sizeof(options->ssh_options)); + strncpy(options->ssh_options, "-q", sizeof(options->ssh_options)); memset(options->replication_user, 0, sizeof(options->replication_user)); memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options)); memset(options->restore_command, 0, sizeof(options->restore_command)); @@ -342,8 +343,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * strncpy(options->node_name, value, MAXLEN); else if (strcmp(name, "conninfo") == 0) strncpy(options->conninfo, value, MAXLEN); - else if (strcmp(name, "pgdata") == 0) - strncpy(options->pgdata, value, MAXPGPATH); + else if (strcmp(name, "data_directory") == 0) + strncpy(options->data_directory, value, MAXPGPATH); else if (strcmp(name, "replication_user") == 0) { if (strlen(value) < NAMEDATALEN) @@ -557,9 +558,9 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * item_list_append(error_list, _("\"node_name\": required parameter was not found")); } - if (!strlen(options->pgdata)) + if (!strlen(options->data_directory)) { - item_list_append(error_list, _("\"pgdata\": required parameter was not found")); + item_list_append(error_list, _("\"data_directory\": required parameter was not found")); } if (!strlen(options->conninfo)) @@ -1008,10 +1009,10 @@ parse_event_notifications_list(t_configuration_options *options, const char *arg bool parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list) { - int options_len = strlen(pg_basebackup_options) + 1; - char *options_string = pg_malloc(options_len); + int options_len = 0; + char *options_string = NULL; + char *options_string_ptr = NULL; - char *options_string_ptr = options_string; /* * Add parsed options to this list, then copy to an array * to pass to getopt @@ -1054,6 +1055,10 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti if (!strlen(pg_basebackup_options)) return backup_options_ok; + options_len = strlen(pg_basebackup_options) + 1; + options_string = pg_malloc(options_len); + options_string_ptr = options_string; + if (server_version_num >= 100000) long_options = long_options_10; else @@ -1143,5 +1148,8 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti backup_options_ok = false; } + pfree(options_string); + pfree(argv_array); + return backup_options_ok; } diff --git a/configfile.h b/configfile.h index 32f8484c..3059b7b9 100644 --- a/configfile.h +++ b/configfile.h @@ -53,8 +53,8 @@ typedef struct int node_id; char node_name[MAXLEN]; char conninfo[MAXLEN]; - char pgdata[MAXPGPATH]; char replication_user[NAMEDATALEN]; + char data_directory[MAXPGPATH]; char pg_bindir[MAXPGPATH]; int replication_type; diff --git a/controldata.c b/controldata.c index cc46b446..7c37e0e1 100644 --- a/controldata.c +++ b/controldata.c @@ -36,6 +36,27 @@ get_db_state(char *data_directory) return state; } + +XLogRecPtr +get_latest_checkpoint_location(char *data_directory) +{ + ControlFileInfo *control_file_info; + XLogRecPtr checkPoint; + + control_file_info = get_controlfile(data_directory); + + if (control_file_info->control_file_processed == false) + return InvalidXLogRecPtr; + + checkPoint = control_file_info->control_file->checkPoint; + + pfree(control_file_info->control_file); + pfree(control_file_info); + + return checkPoint; +} + + const char * describe_db_state(DBState state) { @@ -101,7 +122,6 @@ get_controlfile(char *DataDir) * against. However we're only interested in the first few fields, which * should be constant across supported versions * - * XXX double-check this */ return control_file_info; diff --git a/controldata.h b/controldata.h index 4b5a1a8b..655b283e 100644 --- a/controldata.h +++ b/controldata.h @@ -24,4 +24,7 @@ get_db_state(char *data_directory); extern const char * describe_db_state(DBState state); +extern XLogRecPtr +get_latest_checkpoint_location(char *data_directory); + #endif /* _CONTROLDATA_H_ */ diff --git a/dbutils.c b/dbutils.c index 0993496b..49d951f4 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1036,7 +1036,7 @@ _get_primary_connection(PGconn *conn, } /* find all registered nodes */ - log_info(_("retrieving node list")); + log_verbose(LOG_INFO, _("retrieving node list")); initPQExpBuffer(&query); appendPQExpBuffer(&query, @@ -1182,6 +1182,9 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) PQExpBufferData query; PGresult *res; + if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) + server_version_num = get_server_version(conn, NULL); + initPQExpBuffer(&query); appendPQExpBuffer( &query, @@ -1297,6 +1300,28 @@ get_repmgr_extension_status(PGconn *conn) return REPMGR_AVAILABLE; } +/* ========================= */ +/* node management functions */ +/* ========================= */ + +/* assumes superuser connection */ +void +checkpoint(PGconn *conn) +{ + PGresult *res; + + res = PQexec(conn, "CHECKPOINT"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to execute CHECKPOINT")); + log_detail("%s", PQerrorMessage(conn)); + } + + PQclear(res); + return; +} + /* ===================== */ /* Node record functions */ @@ -1819,7 +1844,7 @@ _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to %s node record:\n %s"), action, @@ -1854,7 +1879,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to update node record:\n %s"), PQerrorMessage(conn)); @@ -2065,7 +2090,7 @@ delete_node_record(PGconn *conn, int node) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_error(_("unable to delete node record:\n %s"), PQerrorMessage(conn)); @@ -2478,7 +2503,7 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { /* we don't treat this as an error */ log_warning(_("unable to create event record:\n %s"), @@ -2717,7 +2742,7 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { appendPQExpBuffer(error_msg, _("unable to create slot '%s' on the upstream node: %s\n"), @@ -2750,7 +2775,7 @@ drop_replication_slot(PGconn *conn, char *slot_name) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to drop replication slot \"%s\":\n %s"), slot_name, @@ -2786,7 +2811,7 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record) termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to query pg_replication_slots:\n %s"), PQerrorMessage(conn)); @@ -2991,7 +3016,7 @@ get_voting_status(PGconn *conn) res = PQexec(conn, "SELECT repmgr.get_voting_status()"); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to query repmgr.get_voting_status():\n %s"), PQerrorMessage(conn)); @@ -3169,7 +3194,7 @@ notify_follow_primary(PGconn *conn, int primary_node_id) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"), PQerrorMessage(conn)); @@ -3228,7 +3253,7 @@ reset_voting_status(PGconn *conn) termPQExpBuffer(&query); // COMMAND_OK? - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to execute repmgr.reset_voting_status():\n %s"), PQerrorMessage(conn)); @@ -3448,7 +3473,7 @@ add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to add table 'repmgr.%s' to replication set '%s':\n %s"), tablename, @@ -3485,7 +3510,7 @@ bdr_node_exists(PGconn *conn, const char *node_name) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { node_exists = false; } @@ -3524,7 +3549,7 @@ get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { status = SLOT_UNKNOWN; } @@ -3596,7 +3621,7 @@ add_extension_tables_to_bdr_replication_set(PGconn *conn) res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { // } @@ -3679,7 +3704,7 @@ get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info res = PQexec(conn, query.data); termPQExpBuffer(&query); - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to retrieve BDR node record for \"%s\":\n %s"), node_name, diff --git a/dbutils.h b/dbutils.h index fe914cd3..795497e2 100644 --- a/dbutils.h +++ b/dbutils.h @@ -326,8 +326,11 @@ bool get_replication_info(PGconn *conn, ReplInfo *replication_info); /* extension functions */ ExtensionStatus get_repmgr_extension_status(PGconn *conn); +/* node management functions */ +void checkpoint(PGconn *conn); + /* result functions */ -bool atobool(const char *value); +bool atobool(const char *value); /* node record functions */ t_server_type parse_node_type(const char *type); diff --git a/dirutil.c b/dirutil.c index 9f80ad38..6cccd721 100644 --- a/dirutil.c +++ b/dirutil.c @@ -216,21 +216,15 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *path) { - const size_t buf_sz = 8192; - char dirpath[buf_sz]; + char dirpath[MAXPGPATH]; struct stat sb; - int r; /* test pgdata */ - snprintf(dirpath, buf_sz, "%s/PG_VERSION", path); + snprintf(dirpath, MAXPGPATH, "%s/PG_VERSION", path); if (stat(dirpath, &sb) == 0) return true; - /* test tablespace dir */ - sprintf(dirpath, "ls %s/PG_*/ -I*", path); - r = system(dirpath); - if (r == 0) - return true; + /* TODO: sanity check other files */ return false; } diff --git a/errcode.h b/errcode.h index d9b7d9a0..26e7273f 100644 --- a/errcode.h +++ b/errcode.h @@ -13,6 +13,7 @@ #define ERR_BAD_RSYNC 2 #define ERR_BAD_PIDFILE 3 #define ERR_NO_RESTART 4 +#define ERR_LOCAL_COMMAND 5 #define ERR_DB_CONN 6 #define ERR_DB_QUERY 7 #define ERR_PROMOTION_FAIL 8 diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 15a71730..f1b771af 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -10,6 +10,8 @@ #include #include "repmgr.h" +#include "controldata.h" +#include "dirutil.h" #include "repmgr-client-global.h" #include "repmgr-action-node.h" @@ -19,7 +21,8 @@ static void format_archive_dir(char *archive_dir); static t_server_action parse_server_action(const char *action); static void _do_node_service_check(void); -static void _do_node_service_list(t_server_action action); +static void _do_node_service_list_actions(t_server_action action); +static void _do_node_status_is_shutdown(void); void do_node_status(void) @@ -39,6 +42,12 @@ do_node_status(void) RecoveryType recovery_type; ReplInfo replication_info = T_REPLINFO_INTIALIZER; + if (runtime_options.is_shutdown == true) + { + return _do_node_status_is_shutdown(); + } + + if (strlen(config_file_options.conninfo)) conn = establish_db_connection(config_file_options.conninfo, true); else @@ -59,6 +68,7 @@ do_node_status(void) PQfinish(conn); exit(ERR_BAD_CONFIG); } + (void) get_server_version(conn, server_version); if (get_cluster_size(conn, cluster_size) == false) @@ -279,6 +289,92 @@ do_node_status(void) } +/* + * --status=(RUNNING|SHUTDOWN|UNKNOWN) + * --last-checkpoint=... + */ + +static +void _do_node_status_is_shutdown(void) +{ + PGPing status; + PQExpBufferData output; + + bool is_shutdown = true; + DBState db_state; + XLogRecPtr checkPoint; + + initPQExpBuffer(&output); + + appendPQExpBuffer( + &output, + "--state="); + + /* sanity-check we're dealing with a PostgreSQL directory */ + if (is_pg_dir(config_file_options.data_directory) == false) + { + appendPQExpBuffer(&output, "UNKNOWN"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + + status = PQping(config_file_options.conninfo); + + switch (status) + { + case PQPING_OK: + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + break; + case PQPING_REJECT: + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + break; + case PQPING_NO_ATTEMPT: + case PQPING_NO_RESPONSE: + /* status not yet clear */ + break; + } + + /* check what pg_controldata says */ + + db_state = get_db_state(config_file_options.data_directory); + + if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY) + { + appendPQExpBuffer(&output, "RUNNING"); + is_shutdown = false; + } + + + checkPoint = get_latest_checkpoint_location(config_file_options.data_directory); + + /* unable to read pg_control, don't know what's happening */ + if (checkPoint == InvalidXLogRecPtr) + { + appendPQExpBuffer(&output, "UNKNOWN"); + is_shutdown = false; + } + + /* server is running in some state - just output --status */ + if (is_shutdown == false) + { + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + appendPQExpBuffer(&output, + "SHUTDOWN --last-checkpoint-lsn=%X/%X", + format_lsn(checkPoint)); + + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; +} + void do_node_check(void) @@ -289,10 +385,15 @@ do_node_check(void) // --action=... // --check // --list -> list what would be executed for each action, filter to --action + +// --checkpoint must be run as superuser - check connection void do_node_service(void) { - t_server_action action; + t_server_action action = ACTION_UNKNOWN; + char data_dir[MAXPGPATH] = ""; + char command[MAXLEN] = ""; + PQExpBufferData output; action = parse_server_action(runtime_options.action); @@ -312,23 +413,66 @@ do_node_service(void) return _do_node_service_check(); } - if (runtime_options.list == true) + if (runtime_options.list_actions == true) { - return _do_node_service_list(action); + return _do_node_service_list_actions(action); } - // do we need data directory? - // - service command defined for action ? -> no - // -> yes - // - pgdata defined in config? OK - // - // - connection available? - // -> get data dir OK (superuser connection issue) + if (data_dir_required_for_action(action)) + { + get_node_data_directory(data_dir); + + if (data_dir[0] == '\0') + { + log_error(_("unable to determine data directory for action")); + exit(ERR_BAD_CONFIG); + } + } - // perform action... - // --dry-run: print only + if ((action == ACTION_STOP || action == ACTION_RESTART) && runtime_options.checkpoint == true) + { + if (runtime_options.dry_run == true) + { + log_info(_("a CHECKPOINT would be issued here")); + } + else + { + PGconn *conn; + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, true); + else + conn = establish_db_connection_by_params(&source_conninfo, true); + + log_notice(_("issuing CHECKPOINT")); + + // check superuser conn! + checkpoint(conn); + + PQfinish(conn); + } + } + + get_server_action(action, command, data_dir); + + if (runtime_options.dry_run == true) + { + log_info(_("would execute server command \"%s\""), command); + return; + } + + log_notice(_("executing server command \"%s\""), command); + + initPQExpBuffer(&output); + + if (local_command(command, &output) == false) + { + exit(ERR_LOCAL_COMMAND); + } + + termPQExpBuffer(&output); } @@ -339,7 +483,7 @@ _do_node_service_check(void) static void -_do_node_service_list(t_server_action action) +_do_node_service_list_actions(t_server_action action) { char command[MAXLEN] = ""; @@ -644,6 +788,8 @@ do_node_restore_config(void) } + + static void format_archive_dir(char *archive_dir) { diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 7af83380..feb8b3c9 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -77,6 +77,7 @@ static void get_barman_property(char *dst, char *name, char *local_repmgr_direct static int get_tablespace_data_barman(char *, TablespaceDataList *); static char *make_barman_ssh_command(char *buf); +static NodeStatus parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkPoint); /* * do_standby_clone() @@ -1305,10 +1306,23 @@ do_standby_follow(void) */ else { + if (config_file_options.data_directory[0] == '\0') + { + if (runtime_options.data_dir[0] == '\0') + { + log_error(_("-D/--pgdata required when providing connection parameters for \"standby follow\"")); + exit(ERR_BAD_CONFIG); + } + strncpy(data_dir, runtime_options.data_dir, MAXPGPATH); + } + else + { + strncpy(data_dir, config_file_options.data_directory, MAXPGPATH); + } + primary_conn = establish_db_connection_by_params(&source_conninfo, true); primary_id = get_primary_node_id(primary_conn); - strncpy(data_dir, runtime_options.data_dir, MAXPGPATH); } if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY) @@ -1530,10 +1544,478 @@ do_standby_follow(void) return; } + +/* + * Perform a switchover by: + * - stopping current primary node + * - promoting this standby node to primary + * - forcing previous primary node to follow this node + * + * Caveats: + * - repmgrd must not be running, otherwise it may + * attempt a failover + * (TODO: find some way of notifying repmgrd of planned + * activity like this) + * - currently only set up for two-node operation; any other + * standbys will probably become downstream cascaded standbys + * of the old primary once it's restarted + * - as we're executing repmgr remotely (on the old primary), + * we'll need the location of its configuration file; this + * can be provided explicitly with -C/--remote-config-file, + * otherwise repmgr will look in default locations on the + * remote server + * + * TODO: + * - make connection test timeouts/intervals configurable (see below) + */ + + void do_standby_switchover(void) { - puts("not implemented"); + PGconn *local_conn; + PGconn *remote_conn; + + t_node_info local_node_record = T_NODE_INFO_INITIALIZER; + + + /* the remote server is the primary to be demoted */ + char remote_conninfo[MAXCONNINFO] = ""; + char remote_host[MAXLEN] = ""; + int remote_node_id; + t_node_info remote_node_record = T_NODE_INFO_INITIALIZER; + + RecordStatus record_status; + RecoveryType recovery_type; + PQExpBufferData remote_command_str; + PQExpBufferData command_output; + + int r, i; + + bool shutdown_success = false; + XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr; + ReplInfo replication_info = T_REPLINFO_INTIALIZER; + + /* + * SANITY CHECKS + * + * We'll be doing a bunch of operations on the remote server (primary + * to be demoted) - careful checks needed before proceding. + */ + + local_conn = establish_db_connection(config_file_options.conninfo, true); + + record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record); + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve node record for node %i"), + config_file_options.node_id); + + PQfinish(local_conn); + exit(ERR_DB_QUERY); + } + + if (!is_streaming_replication(local_node_record.type)) + { + log_error(_("switchover can only performed with streaming replication")); + PQfinish(local_conn); + exit(ERR_BAD_CONFIG); + } + + if (runtime_options.dry_run == true) + { + log_notice(_("checking switchover on node \"%s\" (ID: %i) in --dry-run mode"), + local_node_record.node_name, + local_node_record.node_id); + } + else + { + log_notice(_("executing switchover on node \"%s\" (ID: %i)"), + local_node_record.node_name, + local_node_record.node_id); + } + + /* Check that this is a standby */ + recovery_type = get_recovery_type(local_conn); + if (recovery_type != RECTYPE_STANDBY) + { + log_error(_("switchover must be executed from the standby node to be promoted")); + if (recovery_type == RECTYPE_PRIMARY) + { + log_detail(_("this node (ID: %i) is the primary"), + local_node_record.node_id); + } + PQfinish(local_conn); + + exit(ERR_SWITCHOVER_FAIL); + } + + /* check remote server connection and retrieve its record */ + remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo); + + if (PQstatus(remote_conn) != CONNECTION_OK) + { + log_error(_("unable to connect to current primary node")); + log_hint(_("check that the cluster is correctly configured and this standby is registered")); + PQfinish(local_conn); + exit(ERR_DB_CONN); + } + + record_status = get_node_record(remote_conn, remote_node_id, &remote_node_record); + + if (record_status != RECORD_FOUND) + { + log_error(_("unable to retrieve node record for node %i"), + remote_node_id); + + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_DB_QUERY); + } + + /* + * Check this standby is attached to the demotion candidate + * TODO: + * - check standby is attached to demotion candidate + * (compare primary_conninfo from recovery.conf) + */ + + if (local_node_record.upstream_node_id != remote_node_record.node_id) + { + log_error(_("local node %i is not a downstream of demotion candidate primary %i"), + local_node_record.upstream_node_id, + remote_node_record.node_id); + + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_BAD_CONFIG); + } + + log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name); + + PQfinish(local_conn); + PQfinish(remote_conn); + + /* + * Check that we can connect by SSH to the remote (current primary) server + */ + get_conninfo_value(remote_conninfo, "host", remote_host); + + r = test_ssh_connection(remote_host, runtime_options.remote_user); + + if (r != 0) + { + log_error(_("unable to connect via SSH to host \"%s\", user \"%s\""), + remote_host, runtime_options.remote_user); + exit(ERR_BAD_CONFIG); + } + + + /* Determine the remote's configuration file location */ + /* -------------------------------------------------- */ + + /* Remote configuration file provided - check it exists */ + /* TODO have remote node verify config file "node status --config-file */ + if (runtime_options.remote_config_file[0]) + + { + log_verbose(LOG_INFO, _("looking for file \"%s\" on remote server \"%s\""), + runtime_options.remote_config_file, + remote_host); + + initPQExpBuffer(&remote_command_str); + appendPQExpBuffer(&remote_command_str, "ls "); + + appendShellString(&remote_command_str, runtime_options.remote_config_file); + appendPQExpBuffer(&remote_command_str, " >/dev/null 2>&1 && echo 1 || echo 0"); + + initPQExpBuffer(&command_output); + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (*command_output.data == '0') + { + log_error(_("unable to find the specified repmgr configuration file on remote server")); + log_detail(_("remote configuration file is \"%s\""), + runtime_options.remote_config_file); + exit(ERR_BAD_CONFIG); + } + + log_verbose(LOG_INFO, _("remote configuration file \"%s\" found on remote server"), + runtime_options.remote_config_file); + + termPQExpBuffer(&command_output); + } + /* + * No remote configuration file provided - check some default locations: + * - path of configuration file for this repmgr + * - /etc/repmgr.conf + */ + else + { + int i; + bool remote_config_file_found = false; + + const char *config_paths[] = { + runtime_options.config_file, + "/etc/repmgr.conf", + NULL + }; + + log_verbose(LOG_INFO, _("no remote configuration file provided - checking default locations")); + + for (i = 0; config_paths[i] && remote_config_file_found == false; ++i) + { + /* + * Don't attempt to check for an empty filename - this might be the case + * if no local configuration file was found. + */ + if (!strlen(config_paths[i])) + continue; + + log_verbose(LOG_INFO, _("checking \"%s\"\n"), config_paths[i]); + + initPQExpBuffer(&remote_command_str); + appendPQExpBuffer(&remote_command_str, "ls "); + + appendShellString(&remote_command_str, config_paths[i]); + appendPQExpBuffer(&remote_command_str, " >/dev/null 2>&1 && echo 1 || echo 0"); + + initPQExpBuffer(&command_output); + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (*command_output.data == '1') + { + strncpy(runtime_options.remote_config_file, config_paths[i], MAXLEN); + log_verbose(LOG_INFO, _("configuration file \"%s\" found on remote server"), + runtime_options.remote_config_file); + remote_config_file_found = true; + } + + termPQExpBuffer(&command_output); + } + + if (remote_config_file_found == false) + { + log_error(_("no remote configuration file supplied or found in a default location - terminating")); + log_hint(_("specify the remote configuration file with -C/--remote-config-file")); + exit(ERR_BAD_CONFIG); + } + } + + + /* + * Sanity checks completed - prepare for the switchover + */ + + log_detail(_("local node \"%s\" (ID: %i) will be promoted to primary; " + "current primary \"%s\" (ID: %i) will be demoted to standby"), + local_node_record.node_name, + local_node_record.node_id, + remote_node_record.node_name, + remote_node_record.node_id); + + /* + * Stop the remote primary + * + * We'll issue the pg_ctl command but not force it not to wait; we'll check + * the connection from here - and error out if no shutdown is detected + * after a certain time. + */ + + // TODO: check remote node for archive status etc. + + initPQExpBuffer(&remote_command_str); + initPQExpBuffer(&command_output); + + make_remote_repmgr_path(&remote_command_str); + + + if (runtime_options.dry_run == true) + { + appendPQExpBuffer(&remote_command_str, + "node service --terse -LERROR --list-actions --action=stop"); + + } + else + { + appendPQExpBuffer(&remote_command_str, + "node service --action=stop --checkpoint"); + } + + // XXX handle failure + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + + termPQExpBuffer(&remote_command_str); + + + /* + * --dry-run ends here with display of command which would be used to + * shut down the remote server + */ + if (runtime_options.dry_run == true) + { + log_info(_("following shutdown command would be run on node \"%s\":\n \"%s\""), + remote_node_record.node_name, + command_output.data); + termPQExpBuffer(&command_output); + return; + } + + termPQExpBuffer(&command_output); + shutdown_success = false; + + /* loop for timeout waiting for current primary to stop */ + + for (i = 0; i < config_file_options.reconnect_attempts; i++) + { + /* Check whether primary is available */ + PGPing ping_res; + + log_info(_("checking primary status; %i of %i attempts"), + i + 1, config_file_options.reconnect_attempts); + ping_res = PQping(remote_conninfo); + + + /* database server could not be contacted */ + if (ping_res == PQPING_NO_RESPONSE ||PQPING_NO_ATTEMPT) + { + bool command_success; + + /* + * remote server can't be contacted at protocol level - that + * doesn't necessarily mean it's shut down, so we'll ask + * its repmgr to check at data directory level, and if shut down + * also return the last checkpoint LSN. + */ + + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + "node status --is-shutdown"); + + initPQExpBuffer(&command_output); + + command_success = remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (command_success == true) + { + NodeStatus status = parse_node_status_is_shutdown(command_output.data, &remote_last_checkpoint_lsn); + + if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr) + { + shutdown_success = true; + log_notice(_("current primary has been shut down at location %X/%X"), + format_lsn(remote_last_checkpoint_lsn)); + termPQExpBuffer(&command_output); + + break; + } + } + + termPQExpBuffer(&command_output); + } + + /* XXX make configurable? */ + sleep(config_file_options.reconnect_interval); + i++; + } + + if (shutdown_success == false) + { + log_error(_("shutdown of the primary server could not be confirmed")); + log_hint(_("check the primary server status before performing any further actions")); + exit(ERR_SWITCHOVER_FAIL); + } + + + local_conn = establish_db_connection(config_file_options.conninfo, false); + + if (PQstatus(local_conn) != CONNECTION_OK) + { + log_error(_("unable to reestablish connection to local node \"%s\""), + local_node_record.node_name); + exit(ERR_DB_CONN); + } + + get_replication_info(local_conn, &replication_info); + + if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) + { + log_warning(_("local node \"%s\" is behind shutdown primary \"%s\""), + local_node_record.node_name, + remote_node_record.node_name); + log_detail(_("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X"), + format_lsn(replication_info.last_wal_receive_lsn), + format_lsn(remote_last_checkpoint_lsn)); + + if (runtime_options.always_promote == false) + { + log_notice(_("aborting switchover")); + log_hint(_("use --always-promote to force promotion of standby")); + PQfinish(local_conn); + exit(ERR_SWITCHOVER_FAIL); + } + } + + /* promote standby */ + + // XXX need stripped-down version which skips the sanity checks etc + do_standby_promote(); + + if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) + { + // if --force-rewind was supplied, do that now, otherwise exit + } + + /* + * Execute `repmgr standby follow` to create recovery.conf and start + * the remote server + * + * XXX replace with "node rejoin" + */ + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + " -d \\'%s\\' standby follow", + local_node_record.conninfo); + log_debug("executing:\n \"%s\"", remote_command_str.data); + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + NULL); + + termPQExpBuffer(&remote_command_str); + + return; } @@ -2708,6 +3190,7 @@ copy_configuration_files(void) log_notice(_("copying external configuration files from upstream node")); r = test_ssh_connection(host, runtime_options.remote_user); + if (r != 0) { log_error(_("remote host %s is not reachable via SSH - unable to copy external configuration files"), @@ -2876,3 +3359,126 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name) } } } + + + +static NodeStatus +parse_node_status_is_shutdown(const char *node_status_output, XLogRecPtr *checkPoint) +{ + int options_len = 0; + char *options_string = NULL; + char *options_string_ptr = NULL; + NodeStatus node_status = NODE_STATUS_UNKNOWN; + + /* + * Add parsed options to this list, then copy to an array + * to pass to getopt + */ + static ItemList option_argv = { NULL, NULL }; + + char *argv_item; + int c, argc_item = 1; + + char **argv_array; + ItemListCell *cell; + + int optindex = 0; + + /* We're only interested in these options */ + static struct option long_options[] = + { + {"last-checkpoint-lsn", required_argument, NULL, 'L'}, + {"state", required_argument, NULL, 'S'}, + {NULL, 0, NULL, 0} + }; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_status_output)) + { + *checkPoint = InvalidXLogRecPtr; + return node_status; + } + + options_len = strlen(node_status_output) + 1; + options_string = pg_malloc(options_len); + options_string_ptr = options_string; + + /* Copy the string before operating on it with strtok() */ + strncpy(options_string, node_status_output, options_len); + + /* Extract arguments into a list and keep a count of the total */ + while ((argv_item = strtok(options_string_ptr, " ")) != NULL) + { + item_list_append(&option_argv, argv_item); + + argc_item++; + + if (options_string_ptr != NULL) + options_string_ptr = NULL; + } + + /* + * Array of argument values to pass to getopt_long - this will need to + * include an empty string as the first value (normally this would be + * the program name) + */ + argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2)); + + /* Insert a blank dummy program name at the start of the array */ + argv_array[0] = pg_malloc0(1); + + c = 1; + + /* + * Copy the previously extracted arguments from our list to the array + */ + for (cell = option_argv.head; cell; cell = cell->next) + { + int argv_len = strlen(cell->string) + 1; + + argv_array[c] = pg_malloc0(argv_len); + + strncpy(argv_array[c], cell->string, argv_len); + + c++; + } + + argv_array[c] = NULL; + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "L:S:", long_options, + &optindex)) != -1) + { + switch (c) + { + /* --last-checkpoint-lsn */ + case 'L': + *checkPoint = parse_lsn(optarg); + break; + /* --state */ + case 'S': + { + if (strncmp(optarg, "RUNNING", MAXLEN) == 0) + { + node_status = NODE_STATUS_UP; + } + else if (strncmp(optarg, "SHUTDOWN", MAXLEN) == 0) + { + node_status = NODE_STATUS_DOWN; + } + else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0) + { + node_status = NODE_STATUS_UNKNOWN; + } + } + break; + } + } + + return node_status; +} diff --git a/repmgr-client-global.h b/repmgr-client-global.h index d7cfdbab..9ebe4ac5 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -73,10 +73,19 @@ typedef struct bool wait_register_sync; int wait_register_sync_seconds; + /* "standby switchover" options */ + char remote_config_file[MAXPGPATH]; + bool always_promote; + bool force_rewind; + + /* "node status" options */ + bool is_shutdown; + /* "node service" options */ char action[MAXLEN]; bool check; - bool list; + bool list_actions; + bool checkpoint; /* "cluster event" options */ bool all; @@ -99,15 +108,20 @@ typedef struct /* database connection options */ \ "", "", "", "", \ /* other connection options */ \ - "", "", \ + "", "", \ /* node options */ \ UNKNOWN_NODE_ID, "", "", \ /* "standby clone" options */ \ - false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", NO_UPSTREAM_NODE, false, "", false, \ + false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \ + NO_UPSTREAM_NODE, false, "", false, \ /* "standby register" options */ \ false, 0, \ + /* "standby switchover" options */ \ + "", false, false, \ + /* "node status" options */ \ + false, \ /* "node service" options */ \ - "", false, false, \ + "", false, false, false, \ /* "cluster event" options */ \ false, "", CLUSTER_EVENT_LIMIT, \ "/tmp" \ @@ -167,6 +181,8 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf); +extern void make_remote_repmgr_path(PQExpBufferData *outputbuf); + /* server control functions */ extern void get_server_action(t_server_action action, char *script, char *data_dir); extern bool data_dir_required_for_action(t_server_action action); diff --git a/repmgr-client.c b/repmgr-client.c index beb7a1a2..894a39ba 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -15,6 +15,7 @@ * STANDBY REGISTER * STANDBY UNREGISTER * STANDBY PROMOTE + * STANDBY SWITCHOVER * * BDR REGISTER * BDR UNREGISTER @@ -156,7 +157,7 @@ main(int argc, char **argv) strncpy(runtime_options.username, pw->pw_name, MAXLEN); } - while ((c = getopt_long(argc, argv, "?Vb:f:Fd:h:p:U:R:S:L:vtD:cr", long_options, + while ((c = getopt_long(argc, argv, "?Vb:f:Fd:h:p:U:R:S:L:vtD:crC:", long_options, &optindex)) != -1) { /* @@ -320,13 +321,15 @@ main(int argc, char **argv) { runtime_options.copy_external_config_files_destination = CONFIG_FILE_SAMEPATH; } - else if (strcmp(optarg, "pgdata") == 0) + /* allow "data_directory" as synonym for "pgdata" */ + else if (strcmp(optarg, "pgdata") == 0 || strcmp(optarg, "data_directory") == 0) { runtime_options.copy_external_config_files_destination = CONFIG_FILE_PGDATA; } else { - item_list_append(&cli_errors, _("value provided for '--copy-external-config-files' must be 'samepath' or 'pgdata'")); + item_list_append(&cli_errors, + _("value provided for \"--copy-external-config-files\" must be \"samepath\" or \"pgdata\"")); } } break; @@ -395,6 +398,28 @@ main(int argc, char **argv) } break; + /* "standby switchover" options * + * ---------------------------- */ + + case 'C': + strncpy(runtime_options.remote_config_file, optarg, MAXPGPATH); + break; + + case OPT_ALWAYS_PROMOTE: + runtime_options.always_promote = true; + break; + + case OPT_FORCE_REWIND: + runtime_options.force_rewind = true; + break; + + /* "node status" options * + * --------------------- */ + + case OPT_IS_SHUTDOWN: + runtime_options.is_shutdown = true; + break; + /* "node service" options * * ---------------------- */ @@ -403,14 +428,18 @@ main(int argc, char **argv) strncpy(runtime_options.action, optarg, MAXLEN); break; - case OPT_LIST: - runtime_options.list = true; + case OPT_LIST_ACTIONS: + runtime_options.list_actions = true; break; case OPT_CHECK: runtime_options.check = true; break; + case OPT_CHECKPOINT: + runtime_options.checkpoint = true; + break; + /* "cluster event" options * * ----------------------- */ @@ -1122,16 +1151,7 @@ check_cli_parameters(const int action) /* * if `repmgr standby follow` executed with host params, ensure data * directory was provided - * XXX not needed */ - if (runtime_options.host_param_provided == true) - { - if (runtime_options.data_dir[0] == '\0') - { - item_list_append_format(&cli_errors, - _("-D/--pgdata required when providing connection parameters for \"standby follow\"")); - } - } } break; @@ -1315,7 +1335,7 @@ check_cli_parameters(const int action) } /* repmgr node service --action */ - if (runtime_options.action) + if (runtime_options.action[0] != '\0') { switch (action) { @@ -1323,10 +1343,52 @@ check_cli_parameters(const int action) break; default: item_list_append_format(&cli_warnings, - _("--action not required when executing %s"), + _("--action will be ignored when executing %s"), action_name(action)); } } + + /* repmgr node status --is-shutdown */ + if (runtime_options.is_shutdown == true) + { + switch (action) + { + case NODE_STATUS: + break; + default: + item_list_append_format(&cli_warnings, + _("--is-shutdown will be ignored when executing %s"), + action_name(action)); + } + } + + if (runtime_options.always_promote == true) + { + switch (action) + { + case STANDBY_SWITCHOVER: + break; + default: + item_list_append_format(&cli_warnings, + _("--always-promote will be ignored when executing %s"), + action_name(action)); + } + } + + if ( runtime_options.force_rewind == true) + { + switch (action) + { + case STANDBY_SWITCHOVER: + break; + default: + item_list_append_format(&cli_warnings, + _("--force-rewind will be ignored when executing %s"), + action_name(action)); + } + } + + } @@ -1461,7 +1523,7 @@ do_help(void) puts(""); - printf(_("CLUSTER SHOW options:\n")); + printf(_("CLUSTER EVENT options:\n")); printf(_(" --limit maximum number of events to display (default: %i)\n"), CLUSTER_EVENT_LIMIT); printf(_(" --all display all events (overrides --limit)\n")); printf(_(" --event filter specific event\n")); @@ -1706,8 +1768,8 @@ test_ssh_connection(char *host, char *remote_user) /* - * Execute a command locally. If outputbuf == NULL, discard the - * output. + * Execute a command locally. "outputbuf" should either be an + * initialised PQexpbuffer, or NULL */ bool local_command(const char *command, PQExpBufferData *outputbuf) @@ -2496,7 +2558,7 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf appendPQExpBuffer(&ssh_host, "%s@", user); } - appendPQExpBuffer(&ssh_host, "%s",host); + appendPQExpBuffer(&ssh_host, "%s", host); maxlen_snprintf(ssh_command, "ssh -o Batchmode=yes %s %s %s", @@ -2506,7 +2568,7 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf termPQExpBuffer(&ssh_host); - log_debug("remote_command(): %s\n", ssh_command); + log_debug("remote_command():\n %s", ssh_command); fp = popen(ssh_command, "r"); @@ -2549,6 +2611,20 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf } +void +make_remote_repmgr_path(PQExpBufferData *output_buf) +{ + appendPQExpBuffer(output_buf, + "%s ", make_pg_path("repmgr")); + + if (runtime_options.remote_config_file[0] != '\0') + { + appendPQExpBuffer(output_buf, + "-f %s ", runtime_options.remote_config_file); + } +} + + /* ======================== */ /* server control functions */ /* ======================== */ @@ -2786,15 +2862,13 @@ data_dir_required_for_action(t_server_action action) void get_node_data_directory(char *data_dir_buf) { - PGconn *conn = NULL; - /* * the configuration file setting has priority, and will always be * set when a configuration file was provided */ - if (config_file_options.pgdata[0] != '\0') + if (config_file_options.data_directory[0] != '\0') { - strncpy(data_dir_buf, config_file_options.pgdata, MAXPGPATH); + strncpy(data_dir_buf, config_file_options.data_directory, MAXPGPATH); return; } diff --git a/repmgr-client.h b/repmgr-client.h index 22588a08..9bdd2daf 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -48,21 +48,23 @@ #define OPT_WITHOUT_BARMAN 12 #define OPT_NO_UPSTREAM_CONNECTION 13 #define OPT_REGISTER_WAIT 14 -#define OPT_CLUSTER 15 -#define OPT_LOG_TO_FILE 16 -#define OPT_UPSTREAM_CONNINFO 17 +#define OPT_LOG_TO_FILE 15 +#define OPT_UPSTREAM_CONNINFO 16 /* replaces --no-conninfo-password */ -#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 18 -#define OPT_REPLICATION_USER 19 -#define OPT_EVENT 20 -#define OPT_LIMIT 21 -#define OPT_ALL 22 -#define OPT_DRY_RUN 23 -#define OPT_UPSTREAM_NODE_ID 24 -#define OPT_ACTION 25 +#define OPT_USE_RECOVERY_CONNINFO_PASSWORD 17 +#define OPT_REPLICATION_USER 18 +#define OPT_EVENT 19 +#define OPT_LIMIT 20 +#define OPT_ALL 21 +#define OPT_DRY_RUN 22 +#define OPT_UPSTREAM_NODE_ID 23 +#define OPT_ACTION 24 +#define OPT_LIST_ACTIONS 25 #define OPT_CHECK 26 -#define OPT_LIST 27 - +#define OPT_CHECKPOINT 27 +#define OPT_IS_SHUTDOWN 28 +#define OPT_ALWAYS_PROMOTE 29 +#define OPT_FORCE_REWIND 30 /* deprecated since 3.3 */ #define OPT_DATA_DIR 998 #define OPT_NO_CONNINFO_PASSWORD 999 @@ -97,7 +99,7 @@ static struct option long_options[] = /* logging options */ {"log-level", required_argument, NULL, 'L'}, {"log-to-file", no_argument, NULL, OPT_LOG_TO_FILE}, - {"terse", required_argument, NULL, 't'}, + {"terse", no_argument, NULL, 't'}, {"verbose", no_argument, NULL, 'v'}, /* output options */ @@ -118,10 +120,19 @@ static struct option long_options[] = /* "standby register" options */ {"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT}, +/* "standby switchover" options */ + {"remote-config-file", required_argument, NULL, 'C'}, + {"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE }, + {"force-rewind", no_argument, NULL, OPT_FORCE_REWIND }, + +/* "node status" options */ + {"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN }, + /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION}, {"check", no_argument, NULL, OPT_CHECK}, - {"list", no_argument, NULL, OPT_LIST}, + {"list-actions", no_argument, NULL, OPT_LIST_ACTIONS}, + {"checkpoint", no_argument, NULL, OPT_CHECKPOINT}, /* "cluster event" options */ {"all", no_argument, NULL, OPT_ALL }, @@ -139,7 +150,6 @@ static struct option long_options[] = /* not yet handled */ {"keep-history", required_argument, NULL, 'k'}, {"mode", required_argument, NULL, 'm'}, - {"remote-config-file", required_argument, NULL, 'C'}, {"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG}, {"pg_rewind", optional_argument, NULL, OPT_PG_REWIND}, {"pwprompt", optional_argument, NULL, OPT_PWPROMPT}, diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 7482d1d1..bd5801a5 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -35,7 +35,10 @@ # connection attempt is abandoned; for details see: # https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT -#pgdata # The node's data directory +#data_directory # The node's data directory. This is needed by repmgr + # when performing operations when the PostgreSQL instance + # is not running and there's no other way of determining + # the data directory. # ============================================================================= @@ -139,10 +142,10 @@ # rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\"" # ssh_options=-o "StrictHostKeyChecking no" -pg_ctl_options='' # Options to append to "pg_ctl" -pg_basebackup_options='' # Options to append to "pg_basebackup" -rsync_options='' # Options to append to "rsync" -ssh_options='' # Options to append to "ssh" +#pg_ctl_options='' # Options to append to "pg_ctl" +#pg_basebackup_options='' # Options to append to "pg_basebackup" +#rsync_options='' # Options to append to "rsync" +ssh_options='-q' # Options to append to "ssh"