mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 23:26:30 +00:00
Add docs, fix Makefile and fix some bugs and typos
This commit is contained in:
107
INSTALL
107
INSTALL
@@ -1,32 +1,97 @@
|
||||
Repmgr
|
||||
--------
|
||||
repmgr
|
||||
======
|
||||
|
||||
This module is for monitor a cluster, to install follow next steps
|
||||
To install repmgr and repmgrd follow this steps:
|
||||
|
||||
On Primary:
|
||||
|
||||
1) Create tables for repmgr configuration and monitor (see repmgr.sql)
|
||||
2) Edit repmgr.conf
|
||||
|
||||
On Standby:
|
||||
|
||||
1) Start replica
|
||||
2) Edit repmgr.conf
|
||||
1) Extract the distribution tar file into the contrib directory of the PostgreSQL distribution sources
|
||||
2) Check your primary server is correctly configured
|
||||
3) Write a suitable repmgr.conf for the node
|
||||
4) Build repmgr programs
|
||||
5) Set up trusted copy between postgres accounts (this is only useful for the STANDBY CLONE case)
|
||||
|
||||
|
||||
|
||||
Installing xlog_replay_time module
|
||||
---------------------------------------
|
||||
cd repmgr/sql_utils
|
||||
make USE_PGXS=1
|
||||
make install USE_PGXS=1
|
||||
Extract the distribution tar file
|
||||
=================================
|
||||
cp repmgr.tar.gz ${postgresql_sources}/contrib/.
|
||||
cd ${postgresql_sources}/contrib
|
||||
tar xvzf repmgr-1.0.tar.gz
|
||||
|
||||
|
||||
repmgr.conf
|
||||
------------
|
||||
Check your primary server configuration
|
||||
=======================================
|
||||
|
||||
The configuration file has 3 parameters
|
||||
PostgreSQL should have been previously built and installed on the system
|
||||
|
||||
postgresql.conf
|
||||
---------------
|
||||
listen_addresses='*'
|
||||
wal_level = 'hot_standby'
|
||||
archive_mode = on
|
||||
archive_command = 'cd .' # we can also use exit 0, anything that just do nothing
|
||||
max_wal_senders = 10
|
||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||
hot_standby = on
|
||||
|
||||
Also you need to add the machines that will participate in the cluster in pg_hba.conf.
|
||||
ie:
|
||||
host all all 10.8.0.0/24 trust
|
||||
host replication all 10.8.0.0/24 trust
|
||||
|
||||
|
||||
Write a suitable repmgr.conf
|
||||
============================
|
||||
|
||||
This should be placed in the same place as the programs, we will keep it in repmgr-1.0;
|
||||
and should have these three parameters:
|
||||
|
||||
1) cluster: A string (single quoted) that identify the cluster we are on
|
||||
2) node: An integer that identify our node in the cluster
|
||||
3) conninfo: A string (single quoted) that teach has how to connect to this node
|
||||
|
||||
|
||||
Build repmgr programs
|
||||
=====================
|
||||
|
||||
make repmgr
|
||||
make repmgrd
|
||||
|
||||
make install (this will put the binaries on the same location as your postgres binaries)
|
||||
|
||||
|
||||
Set up trusted copy between postgres accounts
|
||||
---------------------------------------------
|
||||
|
||||
Initial copy between nodes uses the rsync program running over ssh. For this to work, the postgres accounts on each system need to be able to access files on their partner node without a password.
|
||||
|
||||
First generate a ssh key, using an empty passphrase, and copy the resulting keys and a maching authorization file to a privledged user on the other system::
|
||||
|
||||
[postgres@db1]$ ssh-keygen -t rsa
|
||||
Generating public/private rsa key pair.
|
||||
Enter file in which to save the key (/var/lib/pgsql/.ssh/id_rsa):
|
||||
Enter passphrase (empty for no passphrase):
|
||||
Enter same passphrase again:
|
||||
Your identification has been saved in /var/lib/pgsql/.ssh/id_rsa.
|
||||
Your public key has been saved in /var/lib/pgsql/.ssh/id_rsa.pub.
|
||||
The key fingerprint is:
|
||||
aa:bb:cc:dd:ee:ff:aa:11:22:33:44:55:66:77:88:99 postgres@db1.domain.com
|
||||
[postgres@db1]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
||||
[postgres@db1]$ chmod go-rwx ~/.ssh/*
|
||||
[postgres@db1]$ cd ~/.ssh
|
||||
[postgres@db1]$ scp id_rsa.pub id_rsa authorized_keys user@db2:
|
||||
|
||||
Login as that user on the other system, and install the files into the postgres user's account::
|
||||
|
||||
[user@db2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa
|
||||
[user@db2 ~]$ sudo mkdir -p ~postgres/.ssh
|
||||
[user@db2 ~]$ sudo chown postgres.postgres ~postgres/.ssh
|
||||
[user@db2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh
|
||||
[user@db2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh
|
||||
|
||||
Now test that ssh in both directions works (you may have to accept some new known hosts in the process)
|
||||
|
||||
|
||||
repmgr daemon
|
||||
=============
|
||||
|
||||
To use the repmgrd (repmgr daemon) to monitor standby's so we know which ones are more like primary you need to execute the repmgr.sql script in the postgres database.
|
||||
You also need to add a row for every node in the repl_node table
|
||||
|
||||
19
Makefile
19
Makefile
@@ -3,12 +3,18 @@
|
||||
# Copyright (c) 2ndQuadrant, 2010
|
||||
#
|
||||
|
||||
PROGRAM = repmgrd
|
||||
OBJS = config.o dbutils.o repmgrd.o
|
||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o
|
||||
repmgr_OBJS = dbutils.o config.o repmgr.o
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
PG_LIBS = $(libpq_pgport)
|
||||
|
||||
repmgrd: $(repmgrd_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
|
||||
|
||||
repmgr: $(repmgr_OBJS)
|
||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||
|
||||
ifdef USE_PGXS
|
||||
PGXS := $(shell pg_config --pgxs)
|
||||
include $(PGXS)
|
||||
@@ -18,3 +24,12 @@ top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
install:
|
||||
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
|
||||
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)'
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f repmgrd
|
||||
rm -f repmgr
|
||||
|
||||
87
README
Normal file
87
README
Normal file
@@ -0,0 +1,87 @@
|
||||
=====================================================
|
||||
repmgr: Replication Manager for PostgreSQL's clusters
|
||||
=====================================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
PostgreSQL 9.0 allow us to have replicated hot standby servers
|
||||
which we can query and/or use for high availability.
|
||||
|
||||
While the main components of the feature are included with
|
||||
PostgreSQL, the user is expected to manage the high availability
|
||||
part of it.
|
||||
|
||||
repmgr allows you to monitor and manage your replicated PostgreSQL
|
||||
databases as a single cluster.
|
||||
|
||||
repmgr works in two components:
|
||||
* repmgr: command program that performs tasks and then exits
|
||||
* repmgrd: management and monitoring daemon that watches cluster
|
||||
|
||||
|
||||
COMMANDS
|
||||
========
|
||||
|
||||
* standby clone [node to be cloned]
|
||||
|
||||
Backup via rsync the data directory of the primary. And creates the recovery file
|
||||
we need to start a new hot standby server.
|
||||
It doesn't need the repmgr.conf so it can be executed anywhere in the new node.
|
||||
So, you can step where you want your new data directory and execute:
|
||||
|
||||
./repmgr standby clone 10.68.1.161
|
||||
|
||||
That will make a backup of the primary then you only need to execute:
|
||||
|
||||
pg_ctl -D /your_data_directory_path start
|
||||
|
||||
|
||||
* standby promote
|
||||
|
||||
Allows manual promotion of a specific standby into a new primary in the event of a failover
|
||||
This needs to be executed on the same directory where the repmgr.conf is in the standby.
|
||||
Doesn't need any additional arguments:
|
||||
|
||||
./repmgr standby promote
|
||||
|
||||
That will restart your standby postgresql service
|
||||
|
||||
|
||||
* standby follow [node to be followed]
|
||||
|
||||
Allows the standby to re-point itself to a new primary indicated as a parameter.
|
||||
This needs to be executed on the same directory where the repmgr.conf is in the standby.
|
||||
|
||||
./repmgr standby follow 10.68.1.162
|
||||
|
||||
|
||||
PRE-REQUISITES
|
||||
==============
|
||||
|
||||
Primary must be configured with
|
||||
|
||||
postgresql.conf
|
||||
---------------
|
||||
listen_addresses='*'
|
||||
wal_level = 'hot_standby'
|
||||
archive_mode = on
|
||||
archive_command = 'cd .'
|
||||
max_wal_senders = 10
|
||||
wal_keep_segments = 5000 # 80 GB required on pg_xlog
|
||||
hot_standby = on
|
||||
|
||||
Also you need to add the machines that will participate in the cluster in pg_hba.conf.
|
||||
ie:
|
||||
host all all 10.8.0.0/24 trust
|
||||
host replication all 10.8.0.0/24 trust
|
||||
|
||||
|
||||
Configuration file
|
||||
==================
|
||||
|
||||
repmgr.conf: This is looked for in the directory repmgrd or repmgr exists
|
||||
The configuration file should have 3 lines:
|
||||
cluster : tha name of this cluster
|
||||
node : specify the number of this node inside the cluster
|
||||
conninfo: specify how we can connect to this node's PostgreSQL service
|
||||
26
dbutils.c
26
dbutils.c
@@ -30,3 +30,29 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
|
||||
|
||||
return conn;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool
|
||||
is_standby(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
bool result;
|
||||
|
||||
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||
result = false;
|
||||
else
|
||||
result = true;
|
||||
|
||||
PQclear(res);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -5,3 +5,4 @@
|
||||
*/
|
||||
|
||||
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
||||
bool is_standby(PGconn *conn);
|
||||
|
||||
243
repmgr.c
243
repmgr.c
@@ -17,35 +17,19 @@
|
||||
#define RECOVERY_DONE_FILE "recovery.done"
|
||||
|
||||
|
||||
void checkNodeConfiguration(char *conninfo);
|
||||
void getPrimaryConnection(void);
|
||||
|
||||
|
||||
void help(void);
|
||||
void do_standby_clone(char *conninfo);
|
||||
void do_standby_clone(char *master);
|
||||
void do_standby_promote(void);
|
||||
void do_standby_follow(char *master);
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
if (argc != 2)
|
||||
if (argc != 3 && argc != 4)
|
||||
help();
|
||||
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* XXX should we check the master pre requisites? */
|
||||
|
||||
|
||||
@@ -54,11 +38,35 @@ main(int argc, char **argv)
|
||||
if (strcasecmp(argv[1], "STANDBY") == 0)
|
||||
{
|
||||
if (strcasecmp(argv[2], "CLONE") == 0)
|
||||
do_standby_clone(myConninfo);
|
||||
{
|
||||
/*
|
||||
* For STANDBY CLONE we should receive the hostname or ip
|
||||
* of the node being cloned, it should be the third argument
|
||||
*/
|
||||
if (argc == 3)
|
||||
help();
|
||||
|
||||
do_standby_clone(argv[3]);
|
||||
}
|
||||
else if (strcasecmp(argv[2], "PROMOTE") == 0)
|
||||
{
|
||||
/*
|
||||
* For STANDBY PROMOTE we doesn't need any arguments
|
||||
*/
|
||||
if (argc == 4)
|
||||
help();
|
||||
do_standby_promote();
|
||||
}
|
||||
else if (strcasecmp(argv[2], "FOLLOW") == 0)
|
||||
do_standby_follow();
|
||||
{
|
||||
/*
|
||||
* For STANDBY FOLLOW we should receive the hostname or ip
|
||||
* of the node being cloned, it should be the third argument
|
||||
*/
|
||||
if (argc == 3)
|
||||
help();
|
||||
do_standby_follow(argv[3]);
|
||||
}
|
||||
else
|
||||
help();
|
||||
}
|
||||
@@ -70,26 +78,36 @@ main(int argc, char **argv)
|
||||
|
||||
|
||||
void
|
||||
do_standby_clone(char *conninfo)
|
||||
do_standby_clone(char *master)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
int r;
|
||||
char data_dir_full_path[MAXLEN];
|
||||
char *current_dir;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
FILE *recovery_file;
|
||||
|
||||
char line[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
sprintf(master_conninfo, "host=%s", master);
|
||||
|
||||
fprintf(stderr, "Starting backup...");
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
/* Check we are cloning a primary node */
|
||||
if (is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should clone a primary node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Starting backup...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting, "
|
||||
@@ -98,7 +116,7 @@ do_standby_clone(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -111,7 +129,7 @@ do_standby_clone(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't start backup: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -120,24 +138,27 @@ do_standby_clone(char *conninfo)
|
||||
PQfinish(conn);
|
||||
|
||||
/* rsync data directory to current location */
|
||||
sprintf(script, "rsync -r %s .", data_dir_full_path);
|
||||
sprintf(script, "rsync -r %s:%s .", master, data_dir_full_path);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't rsync data directory");
|
||||
return;
|
||||
fprintf(stderr, "Can't rsync data directory\n");
|
||||
/*
|
||||
* we need to return but before that i will let the pg_stop_backup()
|
||||
* happen
|
||||
*/
|
||||
}
|
||||
|
||||
/* inform the master that we have finished the backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
fprintf(stderr, "Finishing backup...");
|
||||
fprintf(stderr, "Finishing backup...\n");
|
||||
|
||||
sprintf(sqlquery, "SELECT pg_stop_backup()");
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't stop backup: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -145,35 +166,32 @@ do_standby_clone(char *conninfo)
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
|
||||
/* Now, if the rsync failed then exit */
|
||||
if (r != 0)
|
||||
return;
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
getcwd(current_dir, MAXLEN);
|
||||
strcpy(recovery_file_path, current_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, RECOVERY_FILE);
|
||||
free(current_dir);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
|
||||
recovery_file = fopen(recovery_file_path, "w");
|
||||
if (recovery_file == NULL)
|
||||
{
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "standby_mode = on\n");
|
||||
sprintf(line, "standby_mode = 'on'\n");
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "primary_conninfo = ");
|
||||
strcat(line, conninfo);
|
||||
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
@@ -181,26 +199,49 @@ do_standby_clone(char *conninfo)
|
||||
/*FreeFile(recovery_file);*/
|
||||
fclose(recovery_file);
|
||||
|
||||
/* We don't start the service because we still may want to move the directory */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_standby_promote(char *conninfo)
|
||||
do_standby_promote(void)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
int r;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
char recovery_done_path[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Promoting standby...");
|
||||
conn = establishDBConnection(myConninfo, true);
|
||||
|
||||
/* Check we are in a standby node */
|
||||
if (!is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Promoting standby...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting "
|
||||
@@ -208,7 +249,7 @@ do_standby_promote(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -217,20 +258,16 @@ do_standby_promote(char *conninfo)
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
|
||||
strcpy(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcpy(recovery_file_path, RECOVERY_FILE);
|
||||
|
||||
strcpy(recovery_done_path, data_dir);
|
||||
strcat(recovery_done_path, "/");
|
||||
strcpy(recovery_done_path, RECOVERY_DONE);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE);
|
||||
rename(recovery_file_path, recovery_done_path);
|
||||
|
||||
sprintf(script, "pg_ctl -D %s restart", data_dir);
|
||||
/* We assume the pg_ctl script is in the PATH */
|
||||
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't restart service");
|
||||
fprintf(stderr, "Can't restart service\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -239,22 +276,57 @@ do_standby_promote(char *conninfo)
|
||||
|
||||
|
||||
void
|
||||
do_standby_follow(char *conninfo)
|
||||
do_standby_follow(char *master)
|
||||
{
|
||||
char myClusterName[MAXLEN];
|
||||
int myLocalId = -1;
|
||||
char myConninfo[MAXLEN];
|
||||
|
||||
PGconn *conn;
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
char script[8192];
|
||||
|
||||
char master_conninfo[MAXLEN];
|
||||
|
||||
int r;
|
||||
char data_dir[MAXLEN];
|
||||
char recovery_file_path[MAXLEN];
|
||||
FILE *recovery_file;
|
||||
|
||||
char line[MAXLEN];
|
||||
|
||||
/* inform the master we will start a backup */
|
||||
conn = establishDBConnection(conninfo, true);
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
parse_config(myClusterName, &myLocalId, myConninfo);
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Changing standby's primary...");
|
||||
sprintf(master_conninfo, "host=%s", master);
|
||||
conn = establishDBConnection(master_conninfo, true);
|
||||
|
||||
/* Check we are going to point to a primary */
|
||||
if (is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The should follow to a primary node\n");
|
||||
return;
|
||||
}
|
||||
PQfinish(conn);
|
||||
|
||||
conn = establishDBConnection(myConninfo, true);
|
||||
/* Check we are in a standby node */
|
||||
if (!is_standby(conn))
|
||||
{
|
||||
fprintf(stderr, "repmgr: The command should be executed in a standby node\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Changing standby's primary...\n");
|
||||
|
||||
/* Get the data directory full path and the last subdirectory */
|
||||
sprintf(sqlquery, "SELECT setting "
|
||||
@@ -262,7 +334,7 @@ do_standby_follow(char *conninfo)
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get info about data directory: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
@@ -272,30 +344,27 @@ do_standby_follow(char *conninfo)
|
||||
PQfinish(conn);
|
||||
|
||||
/* Finally, write the recovery.conf file */
|
||||
strcpy(recovery_file_path, data_dir);
|
||||
strcat(recovery_file_path, "/");
|
||||
strcat(recovery_file_path, RECOVERY_FILE);
|
||||
sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
|
||||
|
||||
recovery_file = fopen(recovery_file_path, "w");
|
||||
if (recovery_file == NULL)
|
||||
{
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually");
|
||||
fprintf(stderr, "could not create recovery.conf file, it could be necesary to create it manually\n");
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "standby_mode = on\n");
|
||||
sprintf(line, "standby_mode = 'on'\n");
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(line, "primary_conninfo = ");
|
||||
strcat(line, conninfo);
|
||||
sprintf(line, "primary_conninfo = '%s'\n", master_conninfo);
|
||||
if (fputs(line, recovery_file) == EOF)
|
||||
{
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually");
|
||||
fprintf(stderr, "recovery file could not be written, it could be necesary to create it manually\n");
|
||||
fclose(recovery_file);
|
||||
return;
|
||||
}
|
||||
@@ -303,6 +372,16 @@ do_standby_follow(char *conninfo)
|
||||
/*FreeFile(recovery_file);*/
|
||||
fclose(recovery_file);
|
||||
|
||||
/* Finally, restart the service */
|
||||
/* We assume the pg_ctl script is in the PATH */
|
||||
sprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
|
||||
r = system(script);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Can't restart service\n");
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -310,11 +389,13 @@ do_standby_follow(char *conninfo)
|
||||
void
|
||||
help(void)
|
||||
{
|
||||
fprintf(stderr, "repmgr: command program that performs tasks and then exits.\n"
|
||||
fprintf(stderr, "repmgr: Replicator manager \n"
|
||||
"This command program performs some tasks like clone a node, promote it "
|
||||
"or making follow another node and then exits.\n"
|
||||
"COMMANDS:\n"
|
||||
"standby clone - allows creation of a new standby\n"
|
||||
"standby promote - allows manual promotion of a specific standby into a "
|
||||
"new master in the event of a failover\n"
|
||||
"standby follow - allows the standby to re-point itself to a new master");
|
||||
"standby clone [node] - allows creation of a new standby\n"
|
||||
"standby promote - allows manual promotion of a specific standby into a "
|
||||
"new master in the event of a failover\n"
|
||||
"standby follow [node] - allows the standby to re-point itself to a new master\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
48
repmgrd.c
48
repmgrd.c
@@ -26,7 +26,6 @@ char primaryConninfo[MAXLEN];
|
||||
PGconn *primaryConn;
|
||||
|
||||
|
||||
void setMyLocalMode(void);
|
||||
void checkClusterConfiguration(void);
|
||||
void checkNodeConfiguration(char *conninfo);
|
||||
void getPrimaryConnection(void);
|
||||
@@ -49,7 +48,7 @@ main(int argc, char **argv)
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.");
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -59,7 +58,7 @@ main(int argc, char **argv)
|
||||
* Set my server mode, establish a connection to primary
|
||||
* and start monitor
|
||||
*/
|
||||
setMyLocalMode();
|
||||
myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE;
|
||||
checkClusterConfiguration();
|
||||
checkNodeConfiguration(conninfo);
|
||||
if (myLocalMode == STANDBY_MODE)
|
||||
@@ -81,27 +80,6 @@ main(int argc, char **argv)
|
||||
* This function ask if we are in recovery, if false we are the primary else
|
||||
* we are a standby
|
||||
*/
|
||||
void
|
||||
setMyLocalMode(void)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
res = PQexec(myLocalConn, "SELECT pg_is_in_recovery()");
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||
myLocalMode = PRIMARY_MODE;
|
||||
else
|
||||
myLocalMode = STANDBY_MODE;
|
||||
|
||||
PQclear(res);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
@@ -114,7 +92,7 @@ getPrimaryConnection(void)
|
||||
res1 = PQexec(myLocalConn, "SELECT * FROM repl_nodes");
|
||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get nodes info: %s", PQerrorMessage(myLocalConn));
|
||||
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res1);
|
||||
PQfinish(myLocalConn);
|
||||
exit(1);
|
||||
@@ -129,7 +107,7 @@ getPrimaryConnection(void)
|
||||
res2 = PQexec(primaryConn, "SELECT pg_is_in_recovery()");
|
||||
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get nodes info: %s", PQerrorMessage(primaryConn));
|
||||
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(primaryConn));
|
||||
PQclear(res1);
|
||||
PQclear(res2);
|
||||
PQfinish(primaryConn);
|
||||
@@ -162,7 +140,7 @@ getPrimaryConnection(void)
|
||||
* to start failover procedure o just fix some situation on the
|
||||
* standby.
|
||||
*/
|
||||
fprintf(stderr, "There isn't a primary node");
|
||||
fprintf(stderr, "There isn't a primary node\n");
|
||||
PQclear(res1);
|
||||
PQfinish(myLocalConn);
|
||||
exit(1);
|
||||
@@ -208,7 +186,7 @@ MonitorExecute(void)
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
@@ -224,7 +202,7 @@ MonitorExecute(void)
|
||||
res = PQexec(primaryConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(primaryConn));
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
@@ -256,7 +234,7 @@ MonitorExecute(void)
|
||||
* will check the result next time we pause for a monitor step.
|
||||
*/
|
||||
if (!PQexec(primaryConn, sqlquery))
|
||||
fprintf(stderr, "replication monitor insert failed: %s",
|
||||
fprintf(stderr, "replication monitor insert failed: %s\n",
|
||||
PQerrorMessage(primaryConn));
|
||||
}
|
||||
|
||||
@@ -270,7 +248,7 @@ checkClusterConfiguration(void)
|
||||
" WHERE relname = 'repl_nodes'");
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
@@ -284,7 +262,7 @@ checkClusterConfiguration(void)
|
||||
*/
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
fprintf(stderr, "The replication cluster is not configured");
|
||||
fprintf(stderr, "The replication cluster is not configured\n");
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
@@ -310,7 +288,7 @@ checkNodeConfiguration(char *conninfo)
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
@@ -331,7 +309,7 @@ checkNodeConfiguration(char *conninfo)
|
||||
|
||||
if (!PQexec(primaryConn, sqlquery))
|
||||
{
|
||||
fprintf(stderr, "Cannot insert node details, %s",
|
||||
fprintf(stderr, "Cannot insert node details, %s\n",
|
||||
PQerrorMessage(primaryConn));
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
@@ -350,7 +328,7 @@ walLocationToBytes(char *wal_location)
|
||||
|
||||
if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2)
|
||||
{
|
||||
fprintf(stderr, "wrong log location format: %s", wal_location);
|
||||
fprintf(stderr, "wrong log location format: %s\n", wal_location);
|
||||
return 0;
|
||||
}
|
||||
return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff);
|
||||
|
||||
Reference in New Issue
Block a user