Initial commit

This commit is contained in:
postgres
2010-06-17 10:43:35 -05:00
commit a12e719259
17 changed files with 858 additions and 0 deletions

0
.gitignore vendored Normal file
View File

10
COPYRIGHT Normal file
View File

@@ -0,0 +1,10 @@
Copyright (c) 2009-2010, 2ndQuadrant Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

43
INSTALL Normal file
View File

@@ -0,0 +1,43 @@
Repmgr
--------
This module is for monitor a cluster, to install follow next steps
On Primary:
1) Install last_xlog_replay_timestamp() function
2) Create tables for repmgr configuration and monitor (see repmgr.sql)
3) Edit repmgr.conf
On Standby:
1) Install xlog_replay_time module
NOTE: this should be done before the standby starts for the first time,
the reason is that when the standby tries to replay WAL segment
where the function was created this file should exist or it will
be an error.
2) Start replica
3) Edit repmgr.conf
Installing xlog_replay_time module
---------------------------------------
cd repmgr/sql_utils
make USE_PGXS=1
make install USE_PGXS=1
Installing last_xlog_replay_timestamp() function
-------------------------------------------------
psql -f $PGDIR/share/contrib/xlog_replay_time.sql
repmgr.conf
------------
The configuration file has 3 parameters
1) cluster: A string (single quoted) that identify the cluster we are on
2) node: An integer that identify our node in the cluster
3) conninfo: A string (single quoted) that teach has how to connect to this node

20
Makefile Normal file
View File

@@ -0,0 +1,20 @@
#
# Makefile
# Copyright (c) 2ndQuadrant, 2010
#
PROGRAM = repmgr
OBJS = config.o dbutils.o main.o
PG_CPPFLAGS = -I$(libpq_srcdir)
PG_LIBS = $(libpq_pgport)
ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs)
include $(PGXS)
else
subdir = contrib/repmgr
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

47
README.phase2 Normal file
View File

@@ -0,0 +1,47 @@
Phase 2
=======
Idea is that we keep a transaction running on primary with a snapshot
that prevents VACUUM from ever removing records that a query on the
standby can see. So there are never any query conflicts.
Make two connections to primary, P1 and P2. One to Standby, S.
P1:
BEGIN;
SELECT txid_current(); INTO Pn->xid
SELECT pg_sleep(10000000000);
P2:
BEGIN;
SELECT txid_current(); INTO Pn->xid
SELECT pg_sleep(10000000000);
Then every 1sec
S: GetOldestXmin() INTO S->xmin
/*
* If both P1 and P2 are older than oldest xmin on S
* then we cancel the oldest of P1 and P2, and
* start a new xid on that channel. So that P1 and P2
* slowly step forwards, as queries complete on the
* standby.
*/
if (TransactionIdPrecedes(P1->xid, S->xmin) &&
TransactionIdPrecedes(P2->xid, S->xmin))
{
if (TransactionIdPrecedes(P1->xid, P2->xid))
cancel_and_rerun(1)
else
cancel_and_rerun(2)
)
cancel_and_rerun()
{
cancel Pn
BEGIN;
SELECT txid_current(); INTO Pn->xid
SELECT pg_sleep(10000000000);
}

100
config.c Normal file
View File

@@ -0,0 +1,100 @@
/*
* config.c
* Copyright (c) 2ndQuadrant, 2010
*
* Functions to parse the config file
*/
#include "repmgr.h"
void
parse_config(char *cluster_name, int *node, char *conninfo)
{
char *s, buff[256];
FILE *fp = fopen (CONFIG_FILE, "r");
if (fp == NULL)
return;
/* Read next line */
while ((s = fgets (buff, sizeof buff, fp)) != NULL)
{
char name[MAXLEN];
char value[MAXLEN];
/* Skip blank lines and comments */
if (buff[0] == '\n' || buff[0] == '#')
continue;
/* Parse name/value pair from line */
parse_line(buff, name, value);
/* Copy into correct entry in parameters struct */
if (strcmp(name, "cluster") == 0)
*node = atoi(value);
else if (strcmp(name, "node") == 0)
*node = atoi(value);
else if (strcmp(name, "conninfo") == 0)
strncpy (conninfo, value, MAXLEN);
else
printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value);
}
/* Close file */
fclose (fp);
}
char *
trim (char *s)
{
/* Initialize start, end pointers */
char *s1 = s, *s2 = &s[strlen (s) - 1];
/* Trim and delimit right side */
while ( (isspace (*s2)) && (s2 >= s1) )
s2--;
*(s2+1) = '\0';
/* Trim left side */
while ( (isspace (*s1)) && (s1 < s2) )
s1++;
/* Copy finished string */
strcpy (s, s1);
return s;
}
void
parse_line(char *buff, char *name, char *value)
{
int i;
int j;
/*
* first we find the name of the parameter
*/
j = 0;
for (i = 0; i < MAXLEN; i++)
{
if (buff[i] != '=')
name[j++] = buff[i];
else
break;
}
name[j] = '\0';
i++;
/*
* Now the value
*/
j = 0;
for ( ; i < MAXLEN; i++)
if (buff[i] == '\'')
continue;
else if (buff[i] != '\n')
value[j++] = buff[i];
else
break;
value[j] = '\0';
trim(value);
}

9
config.h Normal file
View File

@@ -0,0 +1,9 @@
/*
* config.h
* Copyright (c) 2ndQuadrant, 2010
*
*/
void parse_config(char *cluster_name, int *node, char *service);
void parse_line(char *buff, char *name, char *value);
char *trim(char *s);

32
dbutils.c Normal file
View File

@@ -0,0 +1,32 @@
/*
* dbutils.c
* Copyright (c) 2ndQuadrant, 2010
*
* Database connections/managements functions
* XXX At least i can create another function here to avoid code duplication
* on the main file
*
*/
#include "repmgr.h"
PGconn *
establishDBConnection(const char *conninfo, const bool exit_on_error)
{
PGconn *conn;
/* Make a connection to the database */
conn = PQconnectdb(conninfo);
/* Check to see that the backend connection was successfully made */
if ((PQstatus(conn) != CONNECTION_OK))
{
fprintf(stderr, "Connection to database failed: %s",
PQerrorMessage(conn));
if (exit_on_error)
{
PQfinish(conn);
exit(1);
}
}
return conn;
}

7
dbutils.h Normal file
View File

@@ -0,0 +1,7 @@
/*
* dbutils.h
* Copyright (c) 2ndQuadrant, 2010
*
*/
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);

476
main.c Normal file
View File

@@ -0,0 +1,476 @@
/*
* main.c
* Copyright (c) 2ndQuadrant, 2010
*
* Replication manager
* This module connects to the nodes of a replication cluster and monitors
* how far are they from master
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "repmgr.h"
#include "access/transam.h"
char myClusterName[MAXLEN];
/* Local info */
int myLocalMode = STANDBY_MODE;
int myLocalId = -1;
PGconn *myLocalConn;
/* Primary info */
int primaryId;
char primaryConninfo[MAXLEN];
PGconn *primaryConn;
void setMyLocalMode(void);
void checkClusterConfiguration(void);
void checkNodeConfiguration(char *conninfo);
void getPrimaryConnection(void);
void getLocalMonitoredInfo(char *currTimestamp, char *xlogLocation,
char *xlogTimestamp);
void MonitorCheck(void);
void MonitorExecute(void);
TransactionId startSleepingTransaction(PGconn * conn, bool stop_current);
int
main(int argc, char **argv)
{
char conninfo[MAXLEN];
/*
* Read the configuration file: repmgr.conf
*/
parse_config(myClusterName, &myLocalId, conninfo);
if (myLocalId == -1)
{
fprintf(stderr, "Node information is missing. "
"Check the configuration file.");
exit(1);
}
myLocalConn = establishDBConnection(conninfo, true);
/*
* Set my server mode, establish a connection to primary
* and start monitor
*/
setMyLocalMode();
checkClusterConfiguration();
checkNodeConfiguration(conninfo);
if (myLocalMode == STANDBY_MODE)
{
/* I need the id of the primary as well as a connection to it */
getPrimaryConnection();
MonitorCheck();
PQfinish(primaryConn);
}
/* close the connection to the database and cleanup */
PQfinish(myLocalConn);
return 0;
}
/*
* This function ask if we are in recovery, if false we are the primary else
* we are a standby
*/
void
setMyLocalMode(void)
{
PGresult *res;
res = PQexec(myLocalConn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(myLocalConn));
PQclear(res);
PQfinish(myLocalConn);
exit(1);
}
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
myLocalMode = PRIMARY_MODE;
else
myLocalMode = STANDBY_MODE;
PQclear(res);
}
void
getPrimaryConnection(void)
{
PGresult *res1;
PGresult *res2;
int i;
res1 = PQexec(myLocalConn, "SELECT * FROM repl_nodes");
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get nodes info: %s", PQerrorMessage(myLocalConn));
PQclear(res1);
PQfinish(myLocalConn);
exit(1);
}
for (i = 0; i < PQntuples(res1); i++)
{
primaryId = atoi(PQgetvalue(res1, i, 0));
strcmp(primaryConninfo, PQgetvalue(res1, i, 1));
primaryConn = establishDBConnection(primaryConninfo, false);
res2 = PQexec(primaryConn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get nodes info: %s", PQerrorMessage(primaryConn));
PQclear(res1);
PQclear(res2);
PQfinish(primaryConn);
PQfinish(myLocalConn);
exit(1);
}
if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
{
PQclear(res2);
PQclear(res1);
/* On the primary the monitor check is asynchronous */
res1 = PQexec(primaryConn, "SET synchronous_commit TO off");
PQclear(res1);
return;
}
else
{
PQclear(res2);
PQfinish(primaryConn);
primaryId = -1;
}
}
/* If we finish this loop without finding a primary then
* we doesn't have the info or the primary has failed (or we
* reached max_connections or superuser_reserved_connections,
* anything else i'm missing?),
* Probably we will need to check the error to know if we need
* to start failover procedure o just fix some situation on the
* standby.
*/
fprintf(stderr, "There isn't a primary node");
PQclear(res1);
PQfinish(myLocalConn);
exit(1);
}
void
getLocalMonitoredInfo(char *currTimestamp, char *xlogLocation, char *xlogTimestamp)
{
PGresult *res;
char sqlquery[8192];
sprintf(sqlquery,
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
" get_last_xlog_replay_timestamp()");
res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
PQclear(res);
PQfinish(myLocalConn);
exit(1);
}
strcpy(currTimestamp, PQgetvalue(res, 0, 0));
strcpy(xlogLocation , PQgetvalue(res, 0, 1));
strcpy(xlogTimestamp, PQgetvalue(res, 0, 2));
PQclear(res);
return;
}
void
MonitorCheck(void) {
PGconn *p1;
PGconn *p2;
TransactionId p1_xid;
TransactionId p2_xid;
TransactionId local_xmin;
int cicles = 0;
/*
* We are trying to avoid cleanup on primary for records that are still
* visible on standby
*/
p1 = establishDBConnection(primaryConninfo, false);
p2 = establishDBConnection(primaryConninfo, false);
p1_xid = startSleepingTransaction(p1, false);
if (p1_xid == InvalidTransactionId)
{
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
p2_xid = startSleepingTransaction(p2, false);
if (p2_xid == InvalidTransactionId)
{
PQfinish(p1);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
/*
* We are using two long running transactions on primary to avoid
* cleanup of records that are still visible on this standby.
* Every second check if we can let advance the cleanup to avoid
* bloat on primary.
*
* Every 3 cicles (around 3 seconds), insert monitor info
*/
for (;;)
{
PGresult *res;
res = PQexec(myLocalConn, "SELECT get_oldest_xmin()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
PQclear(res);
}
local_xmin = atol(PQgetvalue(res, 0, 0));
PQclear(res);
if (TransactionIdPrecedes(p1_xid, local_xmin) &&
TransactionIdPrecedes(p2_xid, local_xmin))
{
if (TransactionIdPrecedes(p1_xid, p2_xid))
{
p1_xid = startSleepingTransaction(p1, true);
if (p1_xid == InvalidTransactionId)
{
PQfinish(p1);
PQfinish(p2);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
}
else
{
p2_xid = startSleepingTransaction(p2, true);
if (p2_xid == InvalidTransactionId)
{
PQfinish(p1);
PQfinish(p2);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
}
}
PQclear(res);
if (cicles++ >= 3)
MonitorExecute();
sleep(1);
}
}
/*
* Check if its time for next monitor call and if so, do it.
*/
void
MonitorExecute(void)
{
char sqlquery[8192];
char monitor_timestamp[MAXLEN];
char last_wal_location[MAXLEN];
char last_wal_timestamp[MAXLEN];
getLocalMonitoredInfo(monitor_timestamp,
last_wal_location,
last_wal_timestamp);
/*
* Build the SQL to execute on primary
*/
sprintf(sqlquery,
"INSERT INTO repl_status "
"VALUES(%d, %d, '%s'::timestamp with time zone, "
" pg_current_xlog_location(), '%s', "
" '%s'::timestamp with time zone, "
" CURRENT_TIMESTAMP - '%s'::timestamp with time zone) ",
primaryId, myLocalId, monitor_timestamp,
last_wal_location, last_wal_timestamp,
last_wal_timestamp);
/*
* Execute the query asynchronously, but don't check for a result. We
* will check the result next time we pause for a monitor step.
*/
if (!PQexec(primaryConn, sqlquery))
fprintf(stderr, "replication monitor insert failed: %s",
PQerrorMessage(primaryConn));
}
void
checkClusterConfiguration(void)
{
PGresult *res;
res = PQexec(myLocalConn, "SELECT oid FROM pg_class "
" WHERE relname = 'repl_nodes'");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
PQclear(res);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
/*
* If there isn't any results then we have not configured a primary node yet
* in repmgr or the connection string is pointing to the wrong database.
* XXX if we are the primary, should we try to create the tables needed?
*/
if (PQntuples(res) == 0)
{
fprintf(stderr, "The replication cluster is not configured");
PQclear(res);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
PQclear(res);
}
void
checkNodeConfiguration(char *conninfo)
{
PGresult *res;
char sqlquery[8192];
/*
* Check if we have my node information in repl_nodes
*/
sprintf(sqlquery, "SELECT * FROM repl_nodes "
" WHERE id = %d AND cluster = '%s' ",
myLocalId, myClusterName);
res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(myLocalConn));
PQclear(res);
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
/*
* If there isn't any results then we have not configured this node yet
* in repmgr, if that is the case we will insert the node to the cluster
*/
if (PQntuples(res) == 0)
{
PQclear(res);
/* Adding the node */
sprintf(sqlquery, "INSERT INTO repl_nodes "
"VALUES (%d, '%s', '%s')",
myLocalId, myClusterName, conninfo);
if (!PQexec(primaryConn, sqlquery))
{
fprintf(stderr, "Cannot insert node details, %s",
PQerrorMessage(primaryConn));
PQfinish(myLocalConn);
PQfinish(primaryConn);
exit(1);
}
}
PQclear(res);
}
TransactionId
startSleepingTransaction(PGconn *conn, bool stop_current) {
TransactionId txid;
PGresult *res;
/*
* if stop_current is set we cancel any query that is currently
* executing on this connection
*/
if (stop_current)
{
char errbuf[256];
if (PQcancel(PQgetCancel(conn), errbuf, 256) == 0)
fprintf(stderr, "Can't stop current query: %s", errbuf);
if (PQisBusy(conn) == 1)
return InvalidTransactionId;
else
PQexec(conn, "ROLLBACK");
}
if (!PQexec(conn, "BEGIN"))
{
fprintf(stderr, "Can't start a transaction on primary. Error: %s",
PQerrorMessage(conn));
return InvalidTransactionId;
}
res = PQexec(conn, "SELECT txid_current()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn));
PQclear(res);
return InvalidTransactionId;
}
txid = atol(PQgetvalue(res, 0, 0));
PQclear(res);
/* Let this transaction sleep */
PQsendQuery(conn, "SELECT pg_sleep(10000000000)");
return txid;
}
/*
* TransactionIdPrecedes --- is id1 logically < id2?
* This function was copied from src/backend/access/transam/transam.c
*/
bool
TransactionIdPrecedes(TransactionId id1, TransactionId id2)
{
/*
* If either ID is a permanent XID then we can just do unsigned
* comparison. If both are normal, do a modulo-2^31 comparison.
*/
int32 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 < id2);
diff = (int32) (id1 - id2);
return (diff < 0);
}

3
repmgr.conf Normal file
View File

@@ -0,0 +1,3 @@
cluster=test
node=2
conninfo='host=192.168.204.104'

23
repmgr.h Normal file
View File

@@ -0,0 +1,23 @@
/*
* dbutils.h
* Copyright (c) 2ndQuadrant, 2010
*
*/
#ifndef _REPMGR_H_
#define _REPMGR_H_
#include "postgres_fe.h"
#include "libpq-fe.h"
#include "dbutils.h"
#include "config.h"
#define PRIMARY_MODE 0
#define STANDBY_MODE 1
#define MAXLEN 80
#define CONFIG_FILE "repmgr.conf"
#endif

18
repmgr.sql Normal file
View File

@@ -0,0 +1,18 @@
CREATE TABLE repl_nodes (
id integer primary key,
cluster text not null,
conninfo text not null
);
drop table if exists repl_status;
CREATE TABLE repl_status(
primary_node INTEGER NOT NULL,
standby_node INTEGER NOT NULL,
last_monitor_timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
last_wal_primary_location TEXT NOT NULL,
last_wal_standby_location TEXT NOT NULL,
last_wal_standby_timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
lag_time INTERVAL NOT NULL
-- num_wal_packets INTEGER NOT NULL
);

19
sql_utils/Makefile Normal file
View File

@@ -0,0 +1,19 @@
#
# Makefile
# Copyright (c) 2ndQuadrant, 2010
#
MODULE_big = repmgr_wrapper_funcs
DATA_built=repmgr_wrapper_funcs.sql
DATA=uninstall_repmgr_wrapper_funcs.sql
OBJS=repmgr_wrapper_funcs.o
ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs)
include $(PGXS)
else
subdir = contrib/repmgr/sql_utils
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@@ -0,0 +1,36 @@
/*
* repmgr_wrapper_funcs.c
* Copyright (c) 2ndQuadrant, 2010
*
* Expose some backend functions in SQL
*/
#include "postgres.h"
#include "fmgr.h"
#include "access/xlog.h"
#include "storage/procarray.h"
PG_MODULE_MAGIC;
Datum last_xlog_replay_timestamp(PG_FUNCTION_ARGS);
Datum oldest_xmin(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(last_xlog_replay_timestamp);
Datum
last_xlog_replay_timestamp(PG_FUNCTION_ARGS)
{
if (!InRecovery)
PG_RETURN_NULL();
else
PG_RETURN_TIMESTAMPTZ(GetLatestXLogTime());
}
PG_FUNCTION_INFO_V1(oldest_xmin);
Datum
oldest_xmin(PG_FUNCTION_ARGS)
{
PG_RETURN_INT64(GetOldestXmin(false, false));
}

View File

@@ -0,0 +1,13 @@
/*
* repmgr_wrapper_function.sql
* Copyright (c) 2ndQuadrant, 2010
*
*/
CREATE FUNCTION get_last_xlog_replay_timestamp() RETURNS timestamp with time zone
AS 'MODULE_PATHNAME', 'last_xlog_replay_timestamp'
LANGUAGE C STRICT;
CREATE FUNCTION get_oldest_xmin() RETURNS bigint
AS 'MODULE_PATHNAME', 'oldest_xmin'
LANGUAGE C STRICT;

View File

@@ -0,0 +1,2 @@
DROP FUNCTION get_last_xlog_replay_timestamp();
DROP FUNCTION get_oldest_xmin();