From 61687855bb5ffcce561db56658703f9fb693a6c9 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Sun, 3 Oct 2010 17:11:56 -0500 Subject: [PATCH] improve documentation, also add CREATE DATABASE and CREATE USER commands to repmgr.sql --- INSTALL | 52 +++++++++++++++++++++++++++++++++++----------------- README | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- repmgr.sql | 9 +++++++++ repmgrd.c | 12 ++++++++---- 4 files changed, 99 insertions(+), 23 deletions(-) diff --git a/INSTALL b/INSTALL index 2791f1cf..25708478 100644 --- a/INSTALL +++ b/INSTALL @@ -3,11 +3,14 @@ repmgr To install repmgr and repmgrd follow this steps: -1) Extract the distribution tar file into the contrib directory of the PostgreSQL distribution sources +1) Extract the distribution tar file into the contrib directory of the + PostgreSQL distribution sources 2) Check your primary server is correctly configured 3) Write a suitable repmgr.conf for the node 4) Build repmgr programs -5) Set up trusted copy between postgres accounts (this is only useful for the STANDBY CLONE case) +5) Set up trusted copy between postgres accounts (this is only useful for the + STANDBY CLONE case) +6) repmgrd additional steps Extract the distribution tar file @@ -27,26 +30,32 @@ postgresql.conf listen_addresses='*' wal_level = 'hot_standby' archive_mode = on -archive_command = 'cd .' # we can also use exit 0, anything that just do nothing +archive_command = 'cd .' # we can also use exit 0, anything that just do + # nothing max_wal_senders = 10 -wal_keep_segments = 5000 # 80 GB required on pg_xlog +wal_keep_segments = 5000 # 80 GB required on pg_xlog hot_standby = on -Also you need to add the machines that will participate in the cluster in pg_hba.conf. +Also you need to add the machines that will participate in the cluster in +pg_hba.conf. +NOTE: Is preferred that you have a repmgr user and database and just give +access to that user, also if you put a password to the user you need to create +a .pgpass file ie: -host all all 10.8.0.0/24 trust + +host repmgr repmgr 10.8.0.0/24 trust host replication all 10.8.0.0/24 trust Write a suitable repmgr.conf ============================ -This should be placed in the same place as the programs, we will keep it in repmgr-1.0; -and should have these three parameters: +It should have these three parameters: 1) cluster: A string (single quoted) that identify the cluster we are on 2) node: An integer that identify our node in the cluster -3) conninfo: A string (single quoted) that teach has how to connect to this node +3) conninfo: A string (single quoted) that teach repmgr how to connect to this + node Build repmgr programs @@ -55,15 +64,19 @@ Build repmgr programs make repmgr make repmgrd -make install (this will put the binaries on the same location as your postgres binaries) +make install (this will put the binaries on the same location as your postgres +binaries) Set up trusted copy between postgres accounts --------------------------------------------- -Initial copy between nodes uses the rsync program running over ssh. For this to work, the postgres accounts on each system need to be able to access files on their partner node without a password. +Initial copy between nodes uses the rsync program running over ssh. For this +to work, the postgres accounts on each system need to be able to access files +on their partner node without a password. -First generate a ssh key, using an empty passphrase, and copy the resulting keys and a maching authorization file to a privledged user on the other system:: +First generate a ssh key, using an empty passphrase, and copy the resulting +keys and a maching authorization file to a privledged user on the other system: [postgres@db1]$ ssh-keygen -t rsa Generating public/private rsa key pair. @@ -79,7 +92,8 @@ First generate a ssh key, using an empty passphrase, and copy the resulting keys [postgres@db1]$ cd ~/.ssh [postgres@db1]$ scp id_rsa.pub id_rsa authorized_keys user@db2: -Login as that user on the other system, and install the files into the postgres user's account:: +Login as that user on the other system, and install the files into the postgres +user's account:: [user@db2 ~]$ sudo chown postgres.postgres authorized_keys id_rsa.pub id_rsa [user@db2 ~]$ sudo mkdir -p ~postgres/.ssh @@ -87,11 +101,15 @@ Login as that user on the other system, and install the files into the postgres [user@db2 ~]$ sudo mv authorized_keys id_rsa.pub id_rsa ~postgres/.ssh [user@db2 ~]$ sudo chmod -R go-rwx ~postgres/.ssh -Now test that ssh in both directions works (you may have to accept some new known hosts in the process) +Now test that ssh in both directions works (you may have to accept some new +known hosts in the process) -repmgr daemon -============= +repmgrd additional steps +======================== + +To use the repmgrd (repmgr daemon) to monitor standby so we know how is going +the replication and how far they are from primary, you need to execute the +repmgr.sql script in the postgres database. -To use the repmgrd (repmgr daemon) to monitor standby's so we know which ones are more like primary you need to execute the repmgr.sql script in the postgres database. You also need to add a row for every node in the repl_node table diff --git a/README b/README index 2f88ddfe..29e70807 100644 --- a/README +++ b/README @@ -1,4 +1,5 @@ -===================================================== +repmgr daemon +================================================================== repmgr: Replication Manager for PostgreSQL's clusters ===================================================== @@ -23,6 +24,15 @@ repmgr works in two components: COMMANDS ======== +None of this commands need the repmgr.conf file but they need to be able to +connect to the remote and local database. + +You can teach it which is the remote database by using the -h parameter or +as a last parameter in standby clone and standby follow. If you need to specify +a port different then the default 5432 you can specify a -p parameter. +Standby is always considered as localhost and a second -p parameter will indicate +its port if is different from the default one. + * standby clone [node to be cloned] Backup via rsync the data directory of the primary. And creates the recovery file @@ -31,6 +41,8 @@ It doesn't need the repmgr.conf so it can be executed anywhere in the new node. So, you can step where you want your new data directory and execute: ./repmgr standby clone 10.68.1.161 +or from wherever you are +./repmgr -D /path/to/new/data/directory standby clone 10.68.1.161 That will make a backup of the primary then you only need to execute: @@ -77,7 +89,32 @@ host all all 10.8.0.0/24 trust host replication all 10.8.0.0/24 trust -Configuration file +EXAMPLES +======== + +Suppose we have 3 nodes: node1 (the master), node2 and node3 + +To make node2 and node3 be standbys of node1, execute this on both nodes (node2 and node3): +repmgr -D /var/lib/postgresql/9.0 standby clone node1 + +If we lose node1 we can run on node2: +repmgr standby promote + +which makes node2 the new master, we then run on node3: +repmgr standby follow node2 + +to make node3 follow node2 (rather than node1) + +If now we want to add a new node we can a prepare a new server (node4) and run: +repmgr -D /var/lib/postgresql/9.0 standby clone node2 + +NOTE: you need to have PGDIR/bin in your path, if you don't want that as a +permanent setting you can do it this way: + +PATH=$PGDIR/bin:$PATH repmgr standby promote + + +CONFIGURATION FILE ================== repmgr.conf: This is looked for in the directory repmgrd or repmgr exists @@ -85,3 +122,11 @@ The configuration file should have 3 lines: cluster : tha name of this cluster node : specify the number of this node inside the cluster conninfo: specify how we can connect to this node's PostgreSQL service + + +REPMGR DAEMON +============= + +It reads the repmgr.conf file in current directory or as indicated with -f +parameter looks if the standby is in repl_nodes and if it is no +The repmgr daemon creates 2 connections: one to master and other to standby. diff --git a/repmgr.sql b/repmgr.sql index d625f441..fdc776b2 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -1,3 +1,8 @@ +CREATE USER repmgr; +CREATE DATABASE repmgr OWNER repmgr; + +\c repmgr + /* * The table repl_nodes keeps information about all machines in * a cluster @@ -8,6 +13,7 @@ CREATE TABLE repl_nodes ( cluster text not null, -- Name to identify the cluster conninfo text not null ); +ALTER TABLE repl_nodes OWNER TO repmgr; /* * Keeps monitor info about every node and their relative "position" @@ -23,6 +29,7 @@ CREATE TABLE repl_monitor ( replication_lag BIGINT NOT NULL, apply_lag BIGINT NOT NULL ); +ALTER TABLE repl_monitor OWNER TO repmgr; /* @@ -46,3 +53,5 @@ SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, age(now(), last_monitor_time) AS time_lag FROM monitor_info a WHERE row_number = 1; + +ALTER VIEW repl_status OWNER TO repmgr; diff --git a/repmgrd.c b/repmgrd.c index b2b8735c..475a221c 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -41,6 +41,7 @@ static void help(const char *progname); static void checkClusterConfiguration(void); static void checkNodeConfiguration(char *conninfo); static void getPrimaryConnection(void); +static void CancelQuery(void); static void MonitorExecute(void); @@ -144,13 +145,16 @@ main(int argc, char **argv) strcpy(primaryConninfo, conninfo); primaryConn = myLocalConn; } + else + { + /* I need the id of the primary as well as a connection to it */ + getPrimaryConnection(); + } checkClusterConfiguration(); checkNodeConfiguration(conninfo); if (myLocalMode == STANDBY_MODE) { - /* I need the id of the primary as well as a connection to it */ - getPrimaryConnection(); MonitorCheck(); } @@ -174,8 +178,8 @@ getPrimaryConnection(void) /* find all nodes belonging to this cluster */ sprintf(sqlquery, "SELECT * FROM repl_nodes " - " WHERE cluster = '%s' ", - myClusterName); + " WHERE cluster = '%s' and id <> %d", + myClusterName, myLocalId); res1 = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK)