Add docs, fix Makefile and fix some bugs and typos

This commit is contained in:
postgres
2010-09-17 07:14:52 -05:00
parent 707c501b24
commit 93417715d4
7 changed files with 392 additions and 139 deletions

243
repmgr.c
View File

@@ -17,35 +17,19 @@
#define RECOVERY_DONE_FILE "recovery.done"
void checkNodeConfiguration(char *conninfo);
void getPrimaryConnection(void);
void help(void);
void do_standby_clone(char *conninfo);
void do_standby_clone(char *master);
void do_standby_promote(void);
void do_standby_follow(char *master);
int
main(int argc, char **argv)
{
char myClusterName[MAXLEN];
int myLocalId = -1;
char myConninfo[MAXLEN];
if (argc != 2)
if (argc != 3 && argc != 4)
help();
/*
* Read the configuration file: repmgr.conf
*/
parse_config(myClusterName, &myLocalId, myConninfo);
if (myLocalId == -1)
{
fprintf(stderr, "Node information is missing. "
"Check the configuration file.");
exit(1);
}
/* XXX should we check the master pre requisites? */
@@ -54,11 +38,35 @@ main(int argc, char **argv)
if (strcasecmp(argv[1], "STANDBY") == 0)
{
if (strcasecmp(argv[2], "CLONE") == 0)
do_standby_clone(myConninfo);
{
/*
* For STANDBY CLONE we should receive the hostname or ip
* of the node being cloned, it should be the third argument
*/
if (argc == 3)
help();
do_standby_clone(argv[3]);
}
else if (strcasecmp(argv[2], "PROMOTE") == 0)
{
/*
* For STANDBY PROMOTE we doesn't need any arguments
*/
if (argc == 4)
help();
do_standby_promote();
}
else if (strcasecmp(argv[2], "FOLLOW") == 0)
do_standby_follow();
{
/*
* For STANDBY FOLLOW we should receive the hostname or ip
* of the node being cloned, it should be the third argument
*/
if (argc == 3)
help();
do_standby_follow(argv[3]);
}
else
help();
}
@@ -70,26 +78,36 @@ main(int argc, char **argv)
void
do_standby_clone(char *conninfo)
do_standby_clone(char *master)
{
PGconn *conn;
PGresult *res;
char sqlquery[8192];
char script[8192];
char master_conninfo[MAXLEN];
int r;
char data_dir_full_path[MAXLEN];
char *current_dir;
char data_dir[MAXLEN];
char recovery_file_path[MAXLEN];
FILE *recovery_file;
char line[MAXLEN];
/* inform the master we will start a backup */
conn = establishDBConnection(conninfo, true);
sprintf(master_conninfo, "host=%s", master);
fprintf(stderr, "Starting backup...");
/* inform the master we will start a backup */
conn = establishDBConnection(master_conninfo, true);
/* Check we are cloning a primary node */
if (is_standby(conn))
{
fprintf(stderr, "repmgr: The command should clone a primary node\n");
return;
}
fprintf(stderr, "Starting backup...\n");
/* Get the data directory full path and the last subdirectory */
sprintf(sqlquery, "SELECT setting, "
@@ -98,7 +116,7 @@ do_standby_clone(char *conninfo)
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
return;
@@ -111,7 +129,7 @@ do_standby_clone(char *conninfo)
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't start backup: %s", PQerrorMessage(conn));
fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
return;
@@ -120,24 +138,27 @@ do_standby_clone(char *conninfo)
PQfinish(conn);
/* rsync data directory to current location */
sprintf(script, "rsync -r %s .", data_dir_full_path);
sprintf(script, "rsync -r %s:%s .", master, data_dir_full_path);
r = system(script);
if (r != 0)
{
fprintf(stderr, "Can't rsync data directory");
return;
fprintf(stderr, "Can't rsync data directory\n");
/*
* we need to return but before that i will let the pg_stop_backup()
* happen
*/
}
/* inform the master that we have finished the backup */
conn = establishDBConnection(conninfo, true);
conn = establishDBConnection(master_conninfo, true);
fprintf(stderr, "Finishing backup...");
fprintf(stderr, "Finishing backup...\n");
sprintf(sqlquery, "SELECT pg_stop_backup()");
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't stop backup: %s", PQerrorMessage(conn));
fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
return;
@@ -145,35 +166,32 @@ do_standby_clone(char *conninfo)
PQclear(res);
PQfinish(conn);
/* Now, if the rsync failed then exit */
if (r != 0)
return;
/* Finally, write the recovery.conf file */
getcwd(current_dir, MAXLEN);
strcpy(recovery_file_path, current_dir);
strcat(recovery_file_path, "/");
strcat(recovery_file_path, data_dir);
strcat(recovery_file_path, "/");
strcat(recovery_file_path, RECOVERY_FILE);
free(current_dir);
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
recovery_file = fopen(recovery_file_path, "w");
if (recovery_file == NULL)
{
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
return;
}
strcpy(line, "standby_mode = on\n");
sprintf(line, "standby_mode = 'on'\n");
if (fputs(line, recovery_file) == EOF)
{
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
fclose(recovery_file);
return;
}
strcpy(line, "primary_conninfo = ");
strcat(line, conninfo);
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
if (fputs(line, recovery_file) == EOF)
{
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
fclose(recovery_file);
return;
}
@@ -181,26 +199,49 @@ do_standby_clone(char *conninfo)
/*FreeFile(recovery_file);*/
fclose(recovery_file);
/* We don't start the service because we still may want to move the directory */
return;
}
void
do_standby_promote(char *conninfo)
do_standby_promote(void)
{
char myClusterName[MAXLEN];
int myLocalId = -1;
char myConninfo[MAXLEN];
PGconn *conn;
PGresult *res;
char sqlquery[8192];
char script[8192];
int r;
char data_dir[MAXLEN];
char recovery_file_path[MAXLEN];
char recovery_done_path[MAXLEN];
/* inform the master we will start a backup */
conn = establishDBConnection(conninfo, true);
/*
* Read the configuration file: repmgr.conf
*/
parse_config(myClusterName, &myLocalId, myConninfo);
if (myLocalId == -1)
{
fprintf(stderr, "Node information is missing. "
"Check the configuration file.\n");
exit(1);
}
fprintf(stderr, "Promoting standby...");
conn = establishDBConnection(myConninfo, true);
/* Check we are in a standby node */
if (!is_standby(conn))
{
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
return;
}
fprintf(stderr, "Promoting standby...\n");
/* Get the data directory full path and the last subdirectory */
sprintf(sqlquery, "SELECT setting "
@@ -208,7 +249,7 @@ do_standby_promote(char *conninfo)
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
return;
@@ -217,20 +258,16 @@ do_standby_promote(char *conninfo)
PQclear(res);
PQfinish(conn);
strcpy(recovery_file_path, data_dir);
strcat(recovery_file_path, "/");
strcpy(recovery_file_path, RECOVERY_FILE);
strcpy(recovery_done_path, data_dir);
strcat(recovery_done_path, "/");
strcpy(recovery_done_path, RECOVERY_DONE);
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE);
rename(recovery_file_path, recovery_done_path);
sprintf(script, "pg_ctl -D %s restart", data_dir);
/* We assume the pg_ctl script is in the PATH */
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
fprintf(stderr, "Can't restart service");
fprintf(stderr, "Can't restart service\n");
return;
}
@@ -239,22 +276,57 @@ do_standby_promote(char *conninfo)
void
do_standby_follow(char *conninfo)
do_standby_follow(char *master)
{
char myClusterName[MAXLEN];
int myLocalId = -1;
char myConninfo[MAXLEN];
PGconn *conn;
PGresult *res;
char sqlquery[8192];
char script[8192];
char master_conninfo[MAXLEN];
int r;
char data_dir[MAXLEN];
char recovery_file_path[MAXLEN];
FILE *recovery_file;
char line[MAXLEN];
/* inform the master we will start a backup */
conn = establishDBConnection(conninfo, true);
/*
* Read the configuration file: repmgr.conf
*/
parse_config(myClusterName, &myLocalId, myConninfo);
if (myLocalId == -1)
{
fprintf(stderr, "Node information is missing. "
"Check the configuration file.\n");
exit(1);
}
fprintf(stderr, "Changing standby's primary...");
sprintf(master_conninfo, "host=%s", master);
conn = establishDBConnection(master_conninfo, true);
/* Check we are going to point to a primary */
if (is_standby(conn))
{
fprintf(stderr, "repmgr: The should follow to a primary node\n");
return;
}
PQfinish(conn);
conn = establishDBConnection(myConninfo, true);
/* Check we are in a standby node */
if (!is_standby(conn))
{
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
return;
}
fprintf(stderr, "Changing standby's primary...\n");
/* Get the data directory full path and the last subdirectory */
sprintf(sqlquery, "SELECT setting "
@@ -262,7 +334,7 @@ do_standby_follow(char *conninfo)
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
PQclear(res);
PQfinish(conn);
return;
@@ -272,30 +344,27 @@ do_standby_follow(char *conninfo)
PQfinish(conn);
/* Finally, write the recovery.conf file */
strcpy(recovery_file_path, data_dir);
strcat(recovery_file_path, "/");
strcat(recovery_file_path, RECOVERY_FILE);
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
recovery_file = fopen(recovery_file_path, "w");
if (recovery_file == NULL)
{
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
return;
}
strcpy(line, "standby_mode = on\n");
sprintf(line, "standby_mode = 'on'\n");
if (fputs(line, recovery_file) == EOF)
{
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
fclose(recovery_file);
return;
}
strcpy(line, "primary_conninfo = ");
strcat(line, conninfo);
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
if (fputs(line, recovery_file) == EOF)
{
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
fclose(recovery_file);
return;
}
@@ -303,6 +372,16 @@ do_standby_follow(char *conninfo)
/*FreeFile(recovery_file);*/
fclose(recovery_file);
/* Finally, restart the service */
/* We assume the pg_ctl script is in the PATH */
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
fprintf(stderr, "Can't restart service\n");
return;
}
return;
}
@@ -310,11 +389,13 @@ do_standby_follow(char *conninfo)
void
help(void)
{
fprintf(stderr, "repmgr: command program that performs tasks and then exits.\n"
fprintf(stderr, "repmgr: Replicator manager \n"
"This command program performs some tasks like clone a node, promote it "
"or making follow another node and then exits.\n"
"COMMANDS:\n"
"standby clone - allows creation of a new standby\n"
"standby promote - allows manual promotion of a specific standby into a "
"new master in the event of a failover\n"
"standby follow - allows the standby to re-point itself to a new master");
"standby clone [node] - allows creation of a new standby\n"
"standby promote - allows manual promotion of a specific standby into a "
"new master in the event of a failover\n"
"standby follow [node] - allows the standby to re-point itself to a new master\n");
exit(1);
}