From 32b81e7d49dedbecdaca17705f7b8c41f371c01b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 24 Jan 2019 18:42:08 +0900 Subject: [PATCH] "daemon start": initial implementation --- configfile.c | 56 ++++++++++++++++---------- configfile.h | 18 ++++++--- dbutils.c | 25 ++++++++++++ dbutils.h | 1 + errcode.h | 1 + repmgr-action-daemon.c | 87 ++++++++++++++++++++++++++++++++++++++++ repmgr-action-daemon.h | 2 + repmgr-client-global.h | 4 ++ repmgr-client.c | 91 +++++++++++++++++++++++++++++++++++++++++- repmgr-client.h | 2 + repmgr.conf.sample | 13 +++++- 11 files changed, 270 insertions(+), 30 deletions(-) diff --git a/configfile.c b/configfile.c index 93f9cd80..ed5f9ab3 100644 --- a/configfile.c +++ b/configfile.c @@ -371,17 +371,24 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->bdr_local_monitoring_only = false; options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT; - /*----------------- - * service settings - *----------------- + /*------------------------- + * service command settings + *------------------------- */ memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options)); - memset(options->service_stop_command, 0, sizeof(options->service_stop_command)); memset(options->service_start_command, 0, sizeof(options->service_start_command)); + memset(options->service_stop_command, 0, sizeof(options->service_stop_command)); memset(options->service_restart_command, 0, sizeof(options->service_restart_command)); memset(options->service_reload_command, 0, sizeof(options->service_reload_command)); memset(options->service_promote_command, 0, sizeof(options->service_promote_command)); + /*--------------------------------- + * repmgrd service command settings + *--------------------------------- + */ + memset(options->repmgrd_service_start_command, 0, sizeof(options->repmgrd_service_start_command)); + memset(options->repmgrd_service_stop_command, 0, sizeof(options->repmgrd_service_stop_command)); + /*---------------------------- * event notification settings *---------------------------- @@ -585,11 +592,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * else if (strcmp(name, "priority") == 0) options->priority = repmgr_atoi(value, name, error_list, 0); else if (strcmp(name, "location") == 0) - strncpy(options->location, value, MAXLEN); + strncpy(options->location, value, sizeof(options->location)); else if (strcmp(name, "promote_command") == 0) - strncpy(options->promote_command, value, MAXLEN); + strncpy(options->promote_command, value, sizeof(options->promote_command)); else if (strcmp(name, "follow_command") == 0) - strncpy(options->follow_command, value, MAXLEN); + strncpy(options->follow_command, value, sizeof(options->follow_command)); else if (strcmp(name, "reconnect_attempts") == 0) options->reconnect_attempts = repmgr_atoi(value, name, error_list, 0); else if (strcmp(name, "reconnect_interval") == 0) @@ -621,41 +628,48 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * /* service settings */ else if (strcmp(name, "pg_ctl_options") == 0) - strncpy(options->pg_ctl_options, value, MAXLEN); - else if (strcmp(name, "service_stop_command") == 0) - strncpy(options->service_stop_command, value, MAXLEN); + strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options)); else if (strcmp(name, "service_start_command") == 0) - strncpy(options->service_start_command, value, MAXLEN); + strncpy(options->service_start_command, value, sizeof(options->service_start_command)); + else if (strcmp(name, "service_stop_command") == 0) + strncpy(options->service_stop_command, value, sizeof(options->service_stop_command)); else if (strcmp(name, "service_restart_command") == 0) - strncpy(options->service_restart_command, value, MAXLEN); + strncpy(options->service_restart_command, value, sizeof(options->service_restart_command)); else if (strcmp(name, "service_reload_command") == 0) - strncpy(options->service_reload_command, value, MAXLEN); + strncpy(options->service_reload_command, value, sizeof(options->service_reload_command)); else if (strcmp(name, "service_promote_command") == 0) - strncpy(options->service_promote_command, value, MAXLEN); + strncpy(options->service_promote_command, value, sizeof(options->service_promote_command)); + + /* repmgrd service settings */ + else if (strcmp(name, "repmgrd_service_start_command") == 0) + strncpy(options->repmgrd_service_start_command, value, sizeof(options->repmgrd_service_start_command)); + else if (strcmp(name, "repmgrd_service_stop_command") == 0) + strncpy(options->repmgrd_service_stop_command, value, sizeof(options->repmgrd_service_stop_command)); + /* event notification settings */ else if (strcmp(name, "event_notification_command") == 0) - strncpy(options->event_notification_command, value, MAXLEN); + strncpy(options->event_notification_command, value, sizeof(options->event_notification_command)); else if (strcmp(name, "event_notifications") == 0) { /* store unparsed value for comparison when reloading config */ - strncpy(options->event_notifications_orig, value, MAXLEN); + strncpy(options->event_notifications_orig, value, sizeof(options->event_notifications_orig)); parse_event_notifications_list(options, value); } /* barman settings */ else if (strcmp(name, "barman_host") == 0) - strncpy(options->barman_host, value, MAXLEN); + strncpy(options->barman_host, value, sizeof(options->barman_host)); else if (strcmp(name, "barman_server") == 0) - strncpy(options->barman_server, value, MAXLEN); + strncpy(options->barman_server, value, sizeof(options->barman_server)); else if (strcmp(name, "barman_config") == 0) - strncpy(options->barman_config, value, MAXLEN); + strncpy(options->barman_config, value, sizeof(options->barman_config)); /* rsync/ssh settings */ else if (strcmp(name, "rsync_options") == 0) - strncpy(options->rsync_options, value, MAXLEN); + strncpy(options->rsync_options, value, sizeof(options->rsync_options)); else if (strcmp(name, "ssh_options") == 0) - strncpy(options->ssh_options, value, MAXLEN); + strncpy(options->ssh_options, value, sizeof(options->ssh_options)); /* undocumented settings for testing */ else if (strcmp(name, "promote_delay") == 0) diff --git a/configfile.h b/configfile.h index 2c2d54d9..a7591a83 100644 --- a/configfile.h +++ b/configfile.h @@ -141,14 +141,18 @@ typedef struct /* service settings */ char pg_ctl_options[MAXLEN]; - char service_stop_command[MAXLEN]; - char service_start_command[MAXLEN]; - char service_restart_command[MAXLEN]; - char service_reload_command[MAXLEN]; - char service_promote_command[MAXLEN]; + char service_start_command[MAXPGPATH]; + char service_stop_command[MAXPGPATH]; + char service_restart_command[MAXPGPATH]; + char service_reload_command[MAXPGPATH]; + char service_promote_command[MAXPGPATH]; + + /* repmgrd service settings */ + char repmgrd_service_start_command[MAXPGPATH]; + char repmgrd_service_stop_command[MAXPGPATH]; /* event notification settings */ - char event_notification_command[MAXLEN]; + char event_notification_command[MAXPGPATH]; char event_notifications_orig[MAXLEN]; EventNotificationList event_notifications; @@ -205,6 +209,8 @@ typedef struct false, DEFAULT_BDR_RECOVERY_TIMEOUT, \ /* service settings */ \ "", "", "", "", "", "", \ + /* repmgrd service settings */ \ + "", "", \ /* event notification settings */ \ "", "", { NULL, NULL }, \ /* barman settings */ \ diff --git a/dbutils.c b/dbutils.c index 16f989f6..12a99613 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1746,6 +1746,31 @@ repmgrd_get_local_node_id(PGconn *conn) } +bool +repmgrd_check_local_node_id(PGconn *conn) +{ + PGresult *res = NULL; + bool node_id_settable = true; + const char *sqlquery = "SELECT repmgr.get_local_node_id()"; + + res = PQexec(conn, sqlquery); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_db_error(conn, sqlquery, _("repmgrd_get_local_node_id(): unable to execute query")); + } + + if (PQgetisnull(res, 0, 0)) + { + node_id_settable = false; + } + + PQclear(res); + + return node_id_settable; +} + + /* * Function that checks if the primary is in exclusive backup mode. * We'll use this when executing an action can conflict with an exclusive diff --git a/dbutils.h b/dbutils.h index ff22dd4e..57fcd4bf 100644 --- a/dbutils.h +++ b/dbutils.h @@ -431,6 +431,7 @@ TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli); /* repmgrd shared memory functions */ bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); int repmgrd_get_local_node_id(PGconn *conn); +bool repmgrd_check_local_node_id(PGconn *conn); BackupState server_in_exclusive_backup_mode(PGconn *conn); void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile); pid_t repmgrd_get_pid(PGconn *conn); diff --git a/errcode.h b/errcode.h index 9483bffd..fa7e2804 100644 --- a/errcode.h +++ b/errcode.h @@ -48,5 +48,6 @@ #define ERR_REJOIN_FAIL 24 #define ERR_NODE_STATUS 25 #define ERR_REPMGRD_PAUSE 26 +#define ERR_REPMGRD_SERVICE 27 #endif /* _ERRCODE_H_ */ diff --git a/repmgr-action-daemon.c b/repmgr-action-daemon.c index bfa30181..16c30404 100644 --- a/repmgr-action-daemon.c +++ b/repmgr-action-daemon.c @@ -384,6 +384,84 @@ fetch_node_records(PGconn *conn, NodeInfoList *node_list) } +void +do_daemon_start(void) +{ + PGconn *conn = NULL; + PQExpBufferData repmgrd_command; + PQExpBufferData output_buf; + bool success; + + /* + * if local connection available, check if repmgr.so is installed, and + * whether repmgrd is running + */ + log_verbose(LOG_INFO, _("connecting to local node")); + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, false); + else + conn = establish_db_connection_by_params(&source_conninfo, false); + + if (PQstatus(conn) != CONNECTION_OK) + { + log_warning(_("unable to connect to local node")); + } + else + { + check_shared_library(conn); + + if (is_repmgrd_running(conn) == true) + { + log_error(_("repmgrd appears to be running already")); + PQfinish(conn); + exit(ERR_REPMGRD_SERVICE); + } + } + + initPQExpBuffer(&repmgrd_command); + + if (config_file_options.repmgrd_service_start_command[0] != '\0') + { + appendPQExpBufferStr(&repmgrd_command, + config_file_options.repmgrd_service_start_command); + } + else + { + make_repmgrd_path(&repmgrd_command); + } + + if (runtime_options.dry_run == true) + { + log_info(_("prerequisites for starting repmgrd met")); + log_detail("%s", repmgrd_command.data); + exit(SUCCESS); + } + + log_debug("repmgrd start command: '%s'", repmgrd_command.data); + + initPQExpBuffer(&output_buf); + + success = local_command(repmgrd_command.data, &output_buf); + termPQExpBuffer(&repmgrd_command); + + if (success == false) + { + log_error(_("unable to start repmgrd")); + if (output_buf.data[0] != '\0') + log_detail("%s", output_buf.data); + termPQExpBuffer(&output_buf); + exit(ERR_REPMGRD_SERVICE); + } + + termPQExpBuffer(&output_buf); +} + + +void do_daemon_stop(void) +{ +} + void do_daemon_help(void) { print_help_header(); @@ -392,6 +470,8 @@ void do_daemon_help(void) printf(_(" %s [OPTIONS] daemon status\n"), progname()); printf(_(" %s [OPTIONS] daemon pause\n"), progname()); printf(_(" %s [OPTIONS] daemon unpause\n"), progname()); + printf(_(" %s [OPTIONS] daemon start\n"), progname()); + printf(_(" %s [OPTIONS] daemon stop\n"), progname()); puts(""); printf(_("DAEMON STATUS\n")); @@ -416,6 +496,13 @@ void do_daemon_help(void) printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n")); puts(""); + printf(_("DAEMON START\n")); + puts(""); + puts("XXX"); + + printf(_("DAEMON STOP\n")); + puts(""); + puts("XXX"); puts(""); } diff --git a/repmgr-action-daemon.h b/repmgr-action-daemon.h index 4b39e403..223ab863 100644 --- a/repmgr-action-daemon.h +++ b/repmgr-action-daemon.h @@ -23,6 +23,8 @@ extern void do_daemon_status(void); extern void do_daemon_pause(void); extern void do_daemon_unpause(void); +extern void do_daemon_start(void); +extern void do_daemon_stop(void); extern void do_daemon_help(void); #endif diff --git a/repmgr-client-global.h b/repmgr-client-global.h index caaf70fd..8146dcbe 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -239,6 +239,8 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf); extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record); +extern void make_repmgrd_path(PQExpBufferData *output_buf); + /* display functions */ extern void print_help_header(void); @@ -254,5 +256,7 @@ extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBuf extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name); extern bool check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin); +extern void check_shared_library(PGconn *conn); +extern bool is_repmgrd_running(PGconn *conn); #endif /* _REPMGR_CLIENT_GLOBAL_H_ */ diff --git a/repmgr-client.c b/repmgr-client.c index 545221b4..f2302321 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -35,7 +35,8 @@ * DAEMON STATUS * DAEMON PAUSE * DAEMON UNPAUSE - * + * DAEMON START + * DAEMON STOP * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -935,6 +936,10 @@ main(int argc, char **argv) action = DAEMON_PAUSE; else if (strcasecmp(repmgr_action, "UNPAUSE") == 0) action = DAEMON_UNPAUSE; + else if (strcasecmp(repmgr_action, "START") == 0) + action = DAEMON_START; + else if (strcasecmp(repmgr_action, "STOP") == 0) + action = DAEMON_STOP; } else { @@ -1344,6 +1349,11 @@ main(int argc, char **argv) case DAEMON_UNPAUSE: do_daemon_unpause(); break; + case DAEMON_START: + do_daemon_start(); + case DAEMON_STOP: + do_daemon_stop(); + break; default: /* An action will have been determined by this point */ @@ -1834,6 +1844,8 @@ check_cli_parameters(const int action) case NODE_SERVICE: case DAEMON_PAUSE: case DAEMON_UNPAUSE: + case DAEMON_START: + case DAEMON_STOP: break; default: item_list_append_format(&cli_warnings, @@ -1920,7 +1932,10 @@ action_name(const int action) return "DAEMON PAUSE"; case DAEMON_UNPAUSE: return "DAEMON UNPAUSE"; - + case DAEMON_START: + return "DAEMON START"; + case DAEMON_STOP: + return "DAEMON STOP"; } return "UNKNOWN ACTION"; @@ -2731,6 +2746,34 @@ make_remote_repmgr_path(PQExpBufferData *output_buf, t_node_info *remote_node_re } +void +make_repmgrd_path(PQExpBufferData *output_buf) +{ + if (config_file_options.repmgr_bindir[0] != '\0') + { + int len = strlen(config_file_options.repmgr_bindir); + + appendPQExpBufferStr(output_buf, + config_file_options.repmgr_bindir); + + /* Add trailing slash */ + if (config_file_options.repmgr_bindir[len - 1] != '/') + { + appendPQExpBufferChar(output_buf, '/'); + } + } + else if (pg_bindir[0] != '\0') + { + appendPQExpBufferStr(output_buf, + pg_bindir); + } + + appendPQExpBuffer(output_buf, + "repmgrd -f %s ", + config_file_path); +} + + /* ======================== */ /* server control functions */ /* ======================== */ @@ -3358,3 +3401,47 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo PQfinish(follow_target_repl_conn); return success; } + + +/* + * Simple check to see if "shared_preload_libraries" includes "repmgr". + * Parsing "shared_preload_libraries" is non-trivial, as it's potentially + * a comma-separated list, and worse may not be readable by the repmgr + * user. + * + * Instead, we check if a function which should return a value returns + * NULL; this indicates the shared library is not installed. + */ +void +check_shared_library(PGconn *conn) +{ + bool ok = repmgrd_check_local_node_id(conn); + + if (ok == true) + return; + + log_error(_("repmgrd not configured for this node")); + log_hint(_("ensure \"shared_preload_libraries\" includes \"repmgr\" and restart PostgreSQL")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); +} + + +bool +is_repmgrd_running(PGconn *conn) +{ + pid_t pid; + bool is_running = false; + + pid = repmgrd_get_pid(conn); + + if (pid != UNKNOWN_PID) + { + if (kill(pid, 0) != -1) + { + is_running = true; + } + } + + return is_running; +} diff --git a/repmgr-client.h b/repmgr-client.h index 5d47f79c..b819ad2a 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -48,6 +48,8 @@ #define DAEMON_STATUS 22 #define DAEMON_PAUSE 23 #define DAEMON_UNPAUSE 24 +#define DAEMON_START 25 +#define DAEMON_STOP 26 /* command line options without short versions */ #define OPT_HELP 1001 diff --git a/repmgr.conf.sample b/repmgr.conf.sample index 1d758057..5541e94a 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -322,6 +322,12 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # repmgr provides options to override the default pg_ctl commands # used to stop, start, restart, reload and promote the PostgreSQL cluster # +# These options are useful when PostgreSQL has been installed from a package +# which provides OS-level service commands. In environments using an init system +# such as systemd, which keeps track of the state of various services, it is +# essential that the service commands are correctly configured and pg_ctl is +# not executed directly. +# # NOTE: These commands must be runnable on remote nodes as well for switchover # to function correctly. # @@ -343,7 +349,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # # Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands. # -# For more details, see: https://repmgr.org/docs/4.1/configuration-service-commands.html +# For more details, see: https://repmgr.org/docs/current/configuration-service-commands.html #service_start_command = '' #service_stop_command = '' @@ -355,6 +361,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh" # for "promote_command"; do not use "repmgr standby promote" # (or a script which executes "repmgr standby promote") here. +# Used by "repmgr daemon (start|stop)" to control repmgrd +# +#repmgrd_service_start_command = '' +#repmgrd_service_stop_command = '' + #------------------------------------------------------------------------------ # Status check thresholds #------------------------------------------------------------------------------