mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
Add docs, fix Makefile and fix some bugs and typos
This commit is contained in:
243
repmgr.c
243
repmgr.c
@@ -17,35 +17,19 @@
|
||||
#define RECOVERY_DONE_FILE "recovery.done"
|
||||
|
||||
|
||||
void checkNodeConfiguration(char *conninfo);
|
||||
void getPrimaryConnection(void);
|
||||
|
||||
|
||||
void help(void);
|
||||
void do_standby_clone(char *conninfo);
|
||||
void do_standby_clone(char *master);
|
||||
void do_standby_promote(void);
|
||||
void do_standby_follow(char *master);
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
if (argc != 2)
|
||||
if (argc != 3 && argc != 4)
|
||||
help();
|
||||
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* XXX should we check the master pre requisites? */
|
||||
|
||||
|
||||
@@ -54,11 +38,35 @@ main(int argc, char **argv)
|
||||
if (strcasecmp(argv[1], "STANDBY") == 0)
|
||||
{
|
||||
if (strcasecmp(argv[2], "CLONE") == 0)
|
||||
do_standby_clone(myConninfo);
|
||||
{
|
||||
/*
|
||||
* For STANDBY CLONE we should receive the hostname or ip
|
||||
* of the node being cloned, it should be the third argument
|
||||
*/
|
||||
if (argc == 3)
|
||||
help();
|
||||
|
||||
do_standby_clone(argv[3]);
|
||||
}
|
||||
else if (strcasecmp(argv[2], "PROMOTE") == 0)
|
||||
{
|
||||
/*
|
||||
* For STANDBY PROMOTE we doesn't need any arguments
|
||||
*/
|
||||
if (argc == 4)
|
||||
help();
|
||||
do_standby_promote();
|
||||
}
|
||||
else if (strcasecmp(argv[2], "FOLLOW") == 0)
|
||||
do_standby_follow();
|
||||
{
|
||||
/*
|
||||
* For STANDBY FOLLOW we should receive the hostname or ip
|
||||
* of the node being cloned, it should be the third argument
|
||||
*/
|
||||
if (argc == 3)
|
||||
help();
|
||||
do_standby_follow(argv[3]);
|
||||
}
|
||||
else
|
||||
help();
|
||||
}
|
||||
@@ -70,26 +78,36 @@ main(int argc, char **argv)
|
||||
|
||||
|
||||
void
|
||||
do_standby_clone(char *conninfo)
|
||||
do_standby_clone(char *master)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
int r;
|
||||
char data_dir_full_path[MAXLEN];
|
||||
char *current_dir;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
FILE *recovery_file;
|
||||
|
||||
char line[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
sprintf(master_conninfo, "host=%s", master);
|
||||
|
||||
fprintf(stderr, "Starting backup...");
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
/* Check we are cloning a primary node */
|
||||
if (is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should clone a primary node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Starting backup...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting, "
|
||||
@@ -98,7 +116,7 @@ do_standby_clone(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -111,7 +129,7 @@ do_standby_clone(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't start backup: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -120,24 +138,27 @@ do_standby_clone(char *conninfo)
|
||||
PQfinish(conn);
|
||||
|
||||
/* rsync data directory to current location */
|
||||
sprintf(script, "rsync -r %s .", data_dir_full_path);
|
||||
sprintf(script, "rsync -r %s:%s .", master, data_dir_full_path);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't rsync data directory");
|
||||
return;
|
||||
fprintf(stderr, "Can't rsync data directory\n");
|
||||
/*
|
||||
* we need to return but before that i will let the pg_stop_backup()
|
||||
* happen
|
||||
*/
|
||||
}
|
||||
|
||||
/* inform the master that we have finished the backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
fprintf(stderr, "Finishing backup...");
|
||||
fprintf(stderr, "Finishing backup...\n");
|
||||
|
||||
sprintf(sqlquery, "SELECT pg_stop_backup()");
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't stop backup: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -145,35 +166,32 @@ do_standby_clone(char *conninfo)
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
|
||||
/* Now, if the rsync failed then exit */
|
||||
if (r != 0)
|
||||
return;
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
getcwd(current_dir, MAXLEN);
|
||||
strcpy(recovery_file_path, current_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, RECOVERY_FILE);
|
||||
free(current_dir);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
|
||||
recovery_file = fopen(recovery_file_path, "w");
|
||||
if (recovery_file == NULL)
|
||||
{
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "standby_mode = on\n");
|
||||
sprintf(line, "standby_mode = 'on'\n");
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "primary_conninfo = ");
|
||||
strcat(line, conninfo);
|
||||
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
@@ -181,26 +199,49 @@ do_standby_clone(char *conninfo)
|
||||
/*FreeFile(recovery_file);*/
|
||||
fclose(recovery_file);
|
||||
|
||||
/* We don't start the service because we still may want to move the directory */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_standby_promote(char *conninfo)
|
||||
do_standby_promote(void)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
int r;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
char recovery_done_path[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Promoting standby...");
|
||||
conn = establishDBConnection(myConninfo, true);
|
||||
|
||||
/* Check we are in a standby node */
|
||||
if (!is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Promoting standby...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting "
|
||||
@@ -208,7 +249,7 @@ do_standby_promote(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -217,20 +258,16 @@ do_standby_promote(char *conninfo)
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
|
||||
strcpy(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcpy(recovery_file_path, RECOVERY_FILE);
|
||||
|
||||
strcpy(recovery_done_path, data_dir);
|
||||
strcat(recovery_done_path, "/");
|
||||
strcpy(recovery_done_path, RECOVERY_DONE);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE);
|
||||
rename(recovery_file_path, recovery_done_path);
|
||||
|
||||
sprintf(script, "pg_ctl -D %s restart", data_dir);
|
||||
/* We assume the pg_ctl script is in the PATH */
|
||||
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't restart service");
|
||||
fprintf(stderr, "Can't restart service\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -239,22 +276,57 @@ do_standby_promote(char *conninfo)
|
||||
|
||||
|
||||
void
|
||||
do_standby_follow(char *conninfo)
|
||||
do_standby_follow(char *master)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
int r;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
FILE *recovery_file;
|
||||
|
||||
char line[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Changing standby's primary...");
|
||||
sprintf(master_conninfo, "host=%s", master);
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
/* Check we are going to point to a primary */
|
||||
if (is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The should follow to a primary node\n");
|
||||
return;
|
||||
}
|
||||
PQfinish(conn);
|
||||
|
||||
conn = establishDBConnection(myConninfo, true);
|
||||
/* Check we are in a standby node */
|
||||
if (!is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Changing standby's primary...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting "
|
||||
@@ -262,7 +334,7 @@ do_standby_follow(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -272,30 +344,27 @@ do_standby_follow(char *conninfo)
|
||||
PQfinish(conn);
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
strcpy(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, RECOVERY_FILE);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
|
||||
recovery_file = fopen(recovery_file_path, "w");
|
||||
if (recovery_file == NULL)
|
||||
{
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "standby_mode = on\n");
|
||||
sprintf(line, "standby_mode = 'on'\n");
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "primary_conninfo = ");
|
||||
strcat(line, conninfo);
|
||||
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
@@ -303,6 +372,16 @@ do_standby_follow(char *conninfo)
|
||||
/*FreeFile(recovery_file);*/
|
||||
fclose(recovery_file);
|
||||
|
||||
/* Finally, restart the service */
|
||||
/* We assume the pg_ctl script is in the PATH */
|
||||
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't restart service\n");
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -310,11 +389,13 @@ do_standby_follow(char *conninfo)
|
||||
void
|
||||
help(void)
|
||||
{
|
||||
fprintf(stderr, "repmgr: command program that performs tasks and then exits.\n"
|
||||
fprintf(stderr, "repmgr: Replicator manager \n"
|
||||
"This command program performs some tasks like clone a node, promote it "
|
||||
"or making follow another node and then exits.\n"
|
||||
"COMMANDS:\n"
|
||||
"standby clone - allows creation of a new standby\n"
|
||||
"standby promote - allows manual promotion of a specific standby into a "
|
||||
"new master in the event of a failover\n"
|
||||
"standby follow - allows the standby to re-point itself to a new master");
|
||||
"standby clone [node] - allows creation of a new standby\n"
|
||||
"standby promote - allows manual promotion of a specific standby into a "
|
||||
"new master in the event of a failover\n"
|
||||
"standby follow [node] - allows the standby to re-point itself to a new master\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user