/* * repmgr.c - Command interpreter for the repmgr * * * This module is a command-line utility to easily setup a cluster of * hot standby servers for an HA environment * * Commands implemented are. * MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW, * STANDBY PROMOTE * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #include "repmgr.h" #include #include #include #include #include "check_dir.h" #include "strutil.h" #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" #define NO_ACTION 0 /* Not a real action, just to initialize */ #define MASTER_REGISTER 1 #define STANDBY_REGISTER 2 #define STANDBY_CLONE 3 #define STANDBY_PROMOTE 4 #define STANDBY_FOLLOW 5 static void help(const char *progname); static bool create_recovery_file(const char *data_dir, char *master_conninfo); static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static void do_master_register(void); static void do_standby_register(void); static void do_standby_clone(void); static void do_standby_promote(void); static void do_standby_follow(void); const char *progname; const char *keywords[6]; const char *values[6]; const char *dbname = NULL; char *host = NULL; char *username = NULL; char *dest_dir = NULL; char *config_file = NULL; char *remote_user = NULL; char *wal_keep_segments = NULL; bool verbose = false; bool force = false; int numport = 0; char *masterport = NULL; char *server_mode = NULL; char *server_cmd = NULL; repmgr_config config = {}; int main(int argc, char **argv) { static struct option long_options[] = { {"dbname", required_argument, NULL, 'd'}, {"host", required_argument, NULL, 'h'}, {"port", required_argument, NULL, 'p'}, {"username", required_argument, NULL, 'U'}, {"dest-dir", required_argument, NULL, 'D'}, {"config-file", required_argument, NULL, 'f'}, {"remote-user", required_argument, NULL, 'R'}, {"wal-keep-segments", required_argument, NULL, 'w'}, {"force", no_argument, NULL, 'F'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; int optindex; int c; int action = NO_ACTION; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(0); } } while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1) { switch (c) { case 'd': dbname = optarg; break; case 'h': host = optarg; break; case 'p': masterport = optarg; break; case 'U': username = optarg; break; case 'D': dest_dir = optarg; break; case 'f': config_file = optarg; break; case 'R': remote_user = optarg; break; case 'w': wal_keep_segments = optarg; break; case 'F': force = true; break; case 'v': verbose = true; break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } } /* * Now we need to obtain the action, this comes in one of these forms: * MASTER REGISTER | * STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]} * * the node part is optional, if we receive it then we shouldn't * have received a -h option */ if (optind < argc) { server_mode = argv[optind++]; if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0) { fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } } if (optind < argc) { server_cmd = argv[optind++]; if (strcasecmp(server_cmd, "REGISTER") == 0) { /* * we don't use this info in any other place so i will * just execute the compare again instead of having an * additional variable to hold a value that we will use * no more */ if (strcasecmp(server_mode, "MASTER") == 0) action = MASTER_REGISTER; else if (strcasecmp(server_mode, "STANDBY") == 0) action = STANDBY_REGISTER; } else if (strcasecmp(server_cmd, "CLONE") == 0) action = STANDBY_CLONE; else if (strcasecmp(server_cmd, "PROMOTE") == 0) action = STANDBY_PROMOTE; else if (strcasecmp(server_cmd, "FOLLOW") == 0) action = STANDBY_FOLLOW; else { fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } } /* For some actions we still can receive a last argument */ if (action == STANDBY_CLONE) { if (optind < argc) { if (host != NULL) { fprintf(stderr, _("Conflicting parameters you can't use -h while providing a node separately. Try \"%s --help\" for more information.\n"), progname); exit(1); } host = argv[optind++]; } } switch (optind < argc) { case 0: break; default: fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"), progname, argv[optind + 1]); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } if (!check_parameters_for_action(action)) exit(1); if (config_file == NULL) { const int buf_sz = 3 + sizeof(CONFIG_FILE); config_file = malloc(buf_sz); xsnprintf(config_file, buf_sz, "./%s", CONFIG_FILE); } if (wal_keep_segments == NULL) { wal_keep_segments = malloc(5); strcpy(wal_keep_segments, "5000"); } if (dbname == NULL) { if (getenv("PGDATABASE")) dbname = getenv("PGDATABASE"); else if (getenv("PGUSER")) dbname = getenv("PGUSER"); else dbname = "postgres"; } /* * Read the configuration file: repmgr.conf, but only if we're not doing a * STANDBY CLONE action: it is not necessary to have the configuration file * in that case. */ if (action != STANDBY_CLONE) { parse_config(config_file, &config); if (config.node == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } } keywords[2] = "user"; values[2] = username; keywords[3] = "dbname"; values[3] = dbname; keywords[4] = "application_name"; values[4] = (char *) progname; keywords[5] = NULL; values[5] = NULL; switch (action) { case MASTER_REGISTER: do_master_register(); break; case STANDBY_REGISTER: do_standby_register(); break; case STANDBY_CLONE: do_standby_clone(); break; case STANDBY_PROMOTE: do_standby_promote(); break; case STANDBY_FOLLOW: do_standby_follow(); break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } return 0; } static void do_master_register(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char schema_str[MAXLEN]; char schema_quoted[MAXLEN]; bool schema_exists = false; char master_version[MAXVERSIONSTR]; conn = establishDBConnection(config.conninfo, true); /* master should be v9 or better */ pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are a master */ if (is_standby(conn)) { fprintf(stderr, "repmgr: This node should be a master\n"); PQfinish(conn); return; } /* Assemble the unquoted schema name */ maxlen_snprintf(schema_str, "repmgr_%s", config.cluster_name); { char *identifier = PQescapeIdentifier(conn, schema_str, strlen(schema_str)); maxlen_snprintf(schema_quoted, "%s", identifier); PQfreemem(identifier); } /* Check if there is a schema for this cluster */ sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace " "WHERE nspname = '%s'", schema_str); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } if (PQntuples(res) > 0) /* schema exists */ { if (!force) /* and we are not forcing so error */ { fprintf(stderr, "Schema %s already exists.", schema_quoted); PQclear(res); PQfinish(conn); return; } schema_exists = true; } PQclear(res); if (!schema_exists) { /* ok, create the schema */ sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", schema_quoted); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the schema %s: %s\n", schema_quoted, PQerrorMessage(conn)); PQfinish(conn); return; } /* ... the tables */ sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " " id integer primary key, " " cluster text not null, " " conninfo text not null)", schema_quoted); if (!PQexec(conn, sqlquery)) { fprintf(stderr, config.cluster_name, PQerrorMessage(conn)); PQfinish(conn); return; } sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( " " primary_node INTEGER NOT NULL, " " standby_node INTEGER NOT NULL, " " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " " last_wal_primary_location TEXT NOT NULL, " " last_wal_standby_location TEXT NOT NULL, " " replication_lag BIGINT NOT NULL, " " apply_lag BIGINT NOT NULL) ", schema_quoted); } if (!PQexec(conn, sqlquery)) { fprintf(stderr, config.cluster_name, PQerrorMessage(conn)); PQfinish(conn); return; } /* and the view */ sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " " ORDER BY last_monitor_time desc) " " FROM %s.repl_monitor) " " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " " FROM monitor_info a " " WHERE row_number = 1", schema_quoted, schema_quoted); if (!PQexec(conn, sqlquery)) { fprintf(stderr, config.cluster_name, PQerrorMessage(conn)); PQfinish(conn); return; } else { PGconn *master_conn; int id; /* Ensure there isn't any other master already registered */ master_conn = getMasterConnection(conn, config.node, config.cluster_name, &id, NULL); if (master_conn != NULL) { PQfinish(master_conn); fprintf(stderr, "There is a master already in this cluster"); return; } } /* Now register the master */ if (force) { sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes WHERE id = %d", schema_quoted, config.node); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", PQerrorMessage(conn)); PQfinish(conn); return; } } sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s')", schema_quoted, config.node, config.cluster_name, config.conninfo); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", PQerrorMessage(conn)); PQfinish(conn); return; } PQfinish(conn); return; } static void do_standby_register(void) { PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; char sqlquery[QUERY_STR_LEN]; char schema_str[MAXLEN]; char schema_quoted[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; conn = establishDBConnection(config.conninfo, true); /* XXX: A lot of copied code from do_master_register! Refactor */ /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are a standby */ if (!is_standby(conn)) { fprintf(stderr, "repmgr: This node should be a standby\n"); PQfinish(conn); return; } /* Assemble the unquoted schema name */ maxlen_snprintf(schema_str, "repmgr_%s", config.cluster_name); { char *identifier = PQescapeIdentifier(conn, schema_str, strlen(schema_str)); maxlen_snprintf(schema_quoted, "%s", identifier); PQfreemem(identifier); } /* Check if there is a schema for this cluster */ sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace " " WHERE nspname = '%s'", schema_str); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } /* schema doesn't exist */ if (PQntuples(res) == 0) { fprintf(stderr, "Schema %s doesn't exist.\n", schema_quoted); PQclear(res); PQfinish(conn); return; } PQclear(res); /* check if there is a master in this cluster */ master_conn = getMasterConnection(conn, config.node, config.cluster_name, &master_id, NULL); if (!master_conn) return; /* master should be v9 or better */ pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* master and standby version should match */ if (strcmp(master_version, standby_version) != 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); return; } /* Now register the standby */ if (force) { sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " " WHERE id = %d", schema_quoted, config.node); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; } } sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s')", schema_quoted, config.node, config.cluster_name, config.conninfo); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; } PQfinish(master_conn); PQfinish(conn); return; } static void do_standby_clone(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; int r = 0; int i; bool pg_dir = false; char master_data_directory[MAXLEN]; char master_config_file[MAXLEN]; char master_hba_file[MAXLEN]; char master_ident_file[MAXLEN]; char master_control_file[MAXLEN]; char local_control_file[MAXLEN]; char *first_wal_segment = NULL; const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; /* if dest_dir hasn't been provided, initialize to current directory */ if (dest_dir == NULL) { dest_dir = malloc(5); strcpy(dest_dir, "."); } /* Check this directory could be used as a PGDATA dir */ switch (check_dir(dest_dir)) { case 0: /* dest_dir not there, must create it */ if (verbose) printf(_("creating directory %s ... "), dest_dir); fflush(stdout); if (!create_directory(dest_dir)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); return; } break; case 1: /* Present but empty, fix permissions and use it */ if (verbose) printf(_("fixing permissions on existing directory %s ... "), dest_dir); fflush(stdout); if (!set_directory_permissions(dest_dir)) { fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), progname, dest_dir, strerror(errno)); return; } break; case 2: /* Present and not empty */ fprintf(stderr, _("%s: directory \"%s\" exists but is not empty\n"), progname, dest_dir); pg_dir = is_pg_dir(dest_dir); if (pg_dir && !force) { fprintf(stderr, _("\nThis looks like a PostgreSQL directroy.\n" "If you are sure you want to clone here, " "please check there is no PostgreSQL server " "running and use the --force option\n")); return; } else if (pg_dir && force) { /* Let it continue */ break; } else return; default: /* Trouble accessing directory */ fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), progname, dest_dir, strerror(errno)); } /* Connection parameters for master only */ keywords[0] = "host"; values[0] = host; keywords[1] = "port"; values[1] = masterport; /* We need to connect to check configuration and start a backup */ conn = PQconnectdbParams(keywords, values, true); if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), progname); return; } /* primary should be v9 or better */ pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are cloning a primary node */ if (is_standby(conn)) { PQfinish(conn); fprintf(stderr, "\nThe command should clone a primary node\n"); return; } /* And check if it is well configured */ if (!guc_setted(conn, "wal_level", "=", "hot_standby")) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); return; } if (!guc_setted(conn, "wal_keep_segments", ">=", wal_keep_segments)) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), progname, wal_keep_segments); return; } if (!guc_setted(conn, "archive_mode", "=", "on")) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); return; } if (verbose) printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); /* * Check if the tablespace locations exists and that we can write to them. */ sqlquery_snprintf(sqlquery, "SELECT spclocation " " FROM pg_tablespace " "WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } for (i = 0; i < PQntuples(res); i++) { char *tblspc_dir = NULL; strcpy(tblspc_dir, PQgetvalue(res, i, 0)); /* Check this directory could be used as a PGDATA dir */ switch (check_dir(tblspc_dir)) { case 0: /* tblspc_dir not there, must create it */ if (verbose) printf(_("creating directory \"%s\"... "), tblspc_dir); fflush(stdout); if (!create_directory(tblspc_dir)) { fprintf(stderr, _("%s: couldn't create directory \"%s\"... "), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } break; case 1: /* Present but empty, fix permissions and use it */ if (verbose) printf(_("fixing permissions on existing directory \"%s\"... "), tblspc_dir); fflush(stdout); if (!set_directory_permissions(tblspc_dir)) { fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; } break; case 2: /* Present and not empty */ if (!force) { fprintf( stderr, _("%s: directory \"%s\" exists but is not empty\n"), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } default: /* Trouble accessing directory */ fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; } } fprintf(stderr, "Starting backup...\n"); /* Get the data directory full path and the configuration files location */ sqlquery_snprintf( sqlquery, "SELECT name, setting " " FROM pg_settings " " WHERE name IN ('data_directory', 'config_file', 'hba_file', " " 'ident_file')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } for (i = 0; i < PQntuples(res); i++) { if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0) strcpy(master_data_directory, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0) strcpy(master_config_file, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0) strcpy(master_hba_file, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0) strcpy(master_ident_file, PQgetvalue(res, i, 1)); else fprintf(stderr, _("uknown parameter: %s"), PQgetvalue(res, i, 0)); } PQclear(res); /* * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ sqlquery_snprintf( sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } if (verbose) { char *first_wal_seg_pq = PQgetvalue(res, 0, 0); size_t buf_sz = strlen(first_wal_seg_pq); first_wal_segment = malloc(buf_sz + 1); xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq); } PQclear(res); /* * 1) first move global/pg_control * * 2) then move data_directory ommiting the files we have already moved and * pg_xlog content * * 3) finally We need to backup configuration files (that could be on other * directories, debian like systems likes to do that), so look at * config_file, hba_file and ident_file but we can omit * external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ maxlen_snprintf(master_control_file, "%s/global/pg_control", master_data_directory); maxlen_snprintf(local_control_file, "%s/global", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); goto stop_backup; } r = copy_remote_files(host, remote_user, master_control_file, local_control_file, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, true); if (r != 0) goto stop_backup; /* * Copy tablespace locations, i'm doing this separately because i couldn't * find and appropiate rsync option but besides we could someday make all * these rsync happen concurrently */ sqlquery_snprintf(sqlquery, "SELECT spclocation " " FROM pg_tablespace " " WHERE spcname NOT IN ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); goto stop_backup; } for (i = 0; i < PQntuples(res); i++) { r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), PQgetvalue(res, i, 0), true); if (r != 0) goto stop_backup; } r = copy_remote_files(host, remote_user, master_config_file, dest_dir, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_hba_file, dest_dir, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, false); if (r != 0) goto stop_backup; stop_backup: /* inform the master that we have finished the backup */ conn = PQconnectdbParams(keywords, values, true); if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), progname); return; } fprintf(stderr, "Finishing backup...\n"); sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } last_wal_segment = PQgetvalue(res, 0, 0); if (verbose) { printf( _("%s requires primary to keep WAL files %s until at least %s\n"), progname, first_wal_segment, last_wal_segment); /* * Only free the first_wal_segment since it was copied out of the * pqresult. */ free(first_wal_segment); first_wal_segment = NULL; } PQclear(res); PQfinish(conn); /* Now, if the rsync failed then exit */ if (r != 0) return; /* * We need to create the pg_xlog sub directory too, I'm reusing a variable * here. */ maxlen_snprintf(local_control_file, "%s/pg_xlog", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), progname, dest_dir); } /* Finally, write the recovery.conf file */ create_recovery_file(dest_dir, NULL); /* * We don't start the service because we still may want to move the * directory */ return; } static void do_standby_promote(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char script[MAXLEN]; PGconn *old_master_conn; int old_master_id; int r; char data_dir[MAXLEN]; char recovery_file_path[MAXLEN]; char recovery_done_path[MAXLEN]; char standby_version[MAXVERSIONSTR]; /* We need to connect to check configuration */ conn = establishDBConnection(config.conninfo, true); /* we need v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are in a standby node */ if (!is_standby(conn)) { fprintf(stderr, "repmgr: The command should be executed in a standby node\n"); return; } /* we also need to check if there isn't any master already */ old_master_conn = getMasterConnection(conn, config.node, config.cluster_name, &old_master_id, NULL); if (old_master_conn != NULL) { PQfinish(old_master_conn); fprintf(stderr, "There is a master already in this cluster"); return; } if (verbose) printf(_("\n%s: Promoting standby...\n"), progname); /* Get the data directory full path and the last subdirectory */ sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); PQfinish(conn); maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); /* We assume the pg_ctl script is in the PATH */ maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { fprintf(stderr, "Can't restart service\n"); return; } /* reconnect to check we got promoted */ /* * XXX i'm removing this because it gives an annoying message saying * couldn't connect but is just the server starting up * conn = establishDBConnection(config.conninfo, true); * if (is_standby(conn)) * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); * else * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); * PQfinish(conn); */ return; } static void do_standby_follow(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char script[MAXLEN]; char master_conninfo[MAXLEN]; PGconn *master_conn; int master_id; int r; char data_dir[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; /* We need to connect to check configuration */ conn = establishDBConnection(config.conninfo, true); /* Check we are in a standby node */ if (!is_standby(conn)) { fprintf(stderr, "\n%s: The command should be executed in a standby node\n", progname); return; } /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* we also need to check if there is any master in the cluster */ master_conn = getMasterConnection(conn, config.node, config.cluster_name, &master_id, (char *) &master_conninfo); if (master_conn == NULL) { PQfinish(conn); fprintf(stderr, "There isn't a master to follow in this cluster"); return; } /* Check we are going to point to a master */ if (is_standby(master_conn)) { PQfinish(conn); fprintf(stderr, "%s: The node to follow should be a master\n", progname); return; } /* should be v9 or better */ pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* master and standby version should match */ if (strcmp(master_version, standby_version) != 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); return; } /* * set the host and masterport variables with the master ones * before closing the connection because we will need them to * recreate the recovery.conf file */ /* * Copy the hostname to the 'host' global variable from the master * connection. */ { char *pqhost = PQhost(master_conn); const int host_buf_sz = strlen(pqhost); host = malloc(host_buf_sz + 1); xsnprintf(host, host_buf_sz, "%s", pqhost); } masterport = malloc(10); strcpy(masterport, PQport(master_conn)); PQfinish(master_conn); if (verbose) printf(_("\n%s: Changing standby's master...\n"), progname); /* Get the data directory full path */ sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); PQfinish(conn); /* write the recovery.conf file */ if (!create_recovery_file(data_dir, master_conninfo)) return; /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { fprintf(stderr, "Can't restart service\n"); return; } return; } static void help(const char *progname) { printf(_("\n%s: Replicator manager \n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTIONS] master {register}\n"), progname); printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname); printf(_("\nGeneral options:\n")); printf(_(" --help show this help, then exit\n")); printf(_(" --version output version information, then exit\n")); printf(_(" --verbose output verbose activity information\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=DBNAME database to connect to\n")); printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); printf(_(" -p, --port=PORT database server port\n")); printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_("\nConfiguration options:\n")); printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); printf(_(" -f, --config_file=PATH path to the configuration file\n")); printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("or making follow another node and then exits.\n")); printf(_("COMMANDS:\n")); printf(_(" master register - registers the master in a cluster\n")); printf(_(" standby register - registers a standby in a cluster\n")); printf(_(" standby clone [node] - allows creation of a new standby\n")); printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_("new master in the event of a failover\n")); printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); } /* * Creates a recovery file for a standby. * * Writes master_conninfo to recovery.conf if is non-NULL */ static bool create_recovery_file(const char *data_dir, char *master_conninfo) { FILE *recovery_file; char recovery_file_path[MAXLEN]; char line[MAXLEN]; maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); recovery_file = fopen(recovery_file_path, "w"); if (recovery_file == NULL) { fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n"); return false; } maxlen_snprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); fclose(recovery_file); return false; } /* * Template a password into the connection string in recovery.conf. * Sometimes this is passed by the user explicitly, and otherwise we try to * get it into th environment * * XXX: This is pretty dirty, at least push this up to the caller rather * than hitting environment variables at this level. */ if (master_conninfo == NULL) { char *password = getenv("PGPASSWORD"); if (password == NULL) { fprintf(stderr, _("%s: Panic! PGPASSWORD not set, how can we get here?\n"), progname); exit(255); } maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s password=%s'\n", host, ((masterport==NULL) ? "5432" : masterport), password); } else maxlen_snprintf(line, "primary_conninfo = '%s'\n", master_conninfo); if (fputs(line, recovery_file) == EOF) { fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n"); fclose(recovery_file); return false; } /*FreeFile(recovery_file);*/ fclose(recovery_file); return true; } static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) { char script[MAXLEN]; char options[MAXLEN]; char host_string[MAXLEN]; int r; if (strnlen(config.rsync_options, MAXLEN) == 0) maxlen_snprintf( options, "%s", "--archive --checksum --compress --progress --rsh=ssh"); else maxlen_snprintf(options, "%s", config.rsync_options); if (force) strcat(options, " --delete"); if (remote_user == NULL) { maxlen_snprintf(host_string, "%s", host); } else { maxlen_snprintf(host_string,"%s@%s",remote_user,host); } if (is_directory) { strcat(options, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); maxlen_snprintf(script, "rsync %s %s:%s/* %s", options, host_string, remote_path, local_path); } else { maxlen_snprintf(script, "rsync %s %s:%s %s/.", options, host_string, remote_path, local_path); } if (verbose) printf("rsync command line: '%s'\n",script); r = system(script); if (r != 0) fprintf(stderr, _("Can't rsync from remote file or directory (%s:%s)\n"), host_string, remote_path); return r; } /* * Tries to avoid useless or conflicting parameters */ static bool check_parameters_for_action(const int action) { bool ok = true; switch (action) { case MASTER_REGISTER: /* * To register a master we only need the repmgr.conf * all other parameters are at least useless and could be * confusing so reject them */ if ((host != NULL) || (masterport != NULL) || (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a MASTER REGISTER command."); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for MASTER REGISTER command"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } break; case STANDBY_REGISTER: /* * To register a standby we only need the repmgr.conf * we don't need connection parameters to the master * because we can detect the master in repl_nodes */ if ((host != NULL) || (masterport != NULL) || (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY REGISTER command."); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY REGISTER command"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } break; case STANDBY_PROMOTE: /* * To promote a standby we only need the repmgr.conf * we don't want connection parameters to the master * because we will try to detect the master in repl_nodes * if we can't find it then the promote action will be cancelled */ if ((host != NULL) || (masterport != NULL) || (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY PROMOTE command."); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY PROMOTE command"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } break; case STANDBY_FOLLOW: /* * To make a standby follow a master we only need the repmgr.conf * we don't want connection parameters to the new master * because we will try to detect the master in repl_nodes * if we can't find it then the follow action will be cancelled */ if ((host != NULL) || (masterport != NULL) || (username != NULL) || (dbname != NULL)) { fprintf(stderr, "\nYou can't use connection parameters to the master when issuing a STANDBY FOLLOW command."); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } if (dest_dir != NULL) { fprintf(stderr, "\nYou don't need a destination directory for STANDBY FOLLOW command"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } break; case STANDBY_CLONE: /* * To clone a master into a standby we need connection parameters * repmgr.conf is useless because we don't have a server running * in the standby */ if (config_file != NULL) { fprintf(stderr, "\nYou need to use connection parameters to the master when issuing a STANDBY CLONE command."); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); ok = false; } break; } return ok; }