mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 00:26:30 +00:00
Add MASTER REGISTER and STANDBY REGISTER commands.
This commit is contained in:
24
README
24
README
@@ -33,6 +33,16 @@ a port different then the default 5432 you can specify a -p parameter.
|
|||||||
Standby is always considered as localhost and a second -p parameter will indicate
|
Standby is always considered as localhost and a second -p parameter will indicate
|
||||||
its port if is different from the default one.
|
its port if is different from the default one.
|
||||||
|
|
||||||
|
* master register
|
||||||
|
|
||||||
|
Registers a master in a cluster, it needs to be executed before any node is
|
||||||
|
registered
|
||||||
|
|
||||||
|
* standby register
|
||||||
|
|
||||||
|
Registers a standby in a cluster, it needs to be executed before any repmgrd
|
||||||
|
is executed
|
||||||
|
|
||||||
* standby clone [node to be cloned]
|
* standby clone [node to be cloned]
|
||||||
|
|
||||||
Backup via rsync the data directory of the primary. And creates the recovery file
|
Backup via rsync the data directory of the primary. And creates the recovery file
|
||||||
@@ -128,5 +138,17 @@ REPMGR DAEMON
|
|||||||
=============
|
=============
|
||||||
|
|
||||||
It reads the repmgr.conf file in current directory or as indicated with -f
|
It reads the repmgr.conf file in current directory or as indicated with -f
|
||||||
parameter looks if the standby is in repl_nodes and if it is no
|
parameter looks if the standby is in repl_nodes and if it is not add it.
|
||||||
|
|
||||||
|
Before you can run the repmgr daemon (repmgrd) you need to register a master
|
||||||
|
and at least a standby in a cluster, for that you need to use the MASTER
|
||||||
|
REGISTER and STANDBY REGISTER commands.
|
||||||
|
|
||||||
|
For example, following last example and assuming that repmgr.conf is in postgres
|
||||||
|
home directory you will run this on the master:
|
||||||
|
|
||||||
|
repmgr -f /home/postgres/repmgr.conf master register
|
||||||
|
|
||||||
|
and the same in the standby.
|
||||||
|
|
||||||
The repmgr daemon creates 2 connections: one to master and other to standby.
|
The repmgr daemon creates 2 connections: one to master and other to standby.
|
||||||
|
|||||||
80
dbutils.c
80
dbutils.c
@@ -128,3 +128,83 @@ get_cluster_size(PGconn *conn)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get a connection to master by reading repl_nodes, creating a connection
|
||||||
|
* to each node (one at a time) and finding if it is a master or a standby
|
||||||
|
*/
|
||||||
|
PGconn *
|
||||||
|
getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
|
||||||
|
{
|
||||||
|
PGconn *master_conn = NULL;
|
||||||
|
PGresult *res1;
|
||||||
|
PGresult *res2;
|
||||||
|
char sqlquery[8192];
|
||||||
|
char master_conninfo[8192];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* find all nodes belonging to this cluster */
|
||||||
|
sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes "
|
||||||
|
" WHERE cluster = '%s' and id <> %d",
|
||||||
|
cluster, cluster, id);
|
||||||
|
|
||||||
|
res1 = PQexec(standby_conn, sqlquery);
|
||||||
|
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn));
|
||||||
|
PQclear(res1);
|
||||||
|
PQfinish(standby_conn);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < PQntuples(res1); i++)
|
||||||
|
{
|
||||||
|
/* initialize with the values of the current node being processed */
|
||||||
|
*master_id = atoi(PQgetvalue(res1, i, 0));
|
||||||
|
strcpy(master_conninfo, PQgetvalue(res1, i, 2));
|
||||||
|
master_conn = establishDBConnection(master_conninfo, false);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* I can't use the is_standby() function here because on error that
|
||||||
|
* function closes the connection i pass and exit, but i still need to close
|
||||||
|
* standby_conn
|
||||||
|
*/
|
||||||
|
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
|
||||||
|
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(master_conn));
|
||||||
|
PQclear(res2);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if false, this is the master */
|
||||||
|
if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
|
||||||
|
{
|
||||||
|
PQclear(res2);
|
||||||
|
PQclear(res1);
|
||||||
|
return master_conn;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if it is a standby clear info */
|
||||||
|
PQclear(res2);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
*master_id = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we finish this loop without finding a master then
|
||||||
|
* we doesn't have the info or the master has failed (or we
|
||||||
|
* reached max_connections or superuser_reserved_connections,
|
||||||
|
* anything else i'm missing?),
|
||||||
|
* Probably we will need to check the error to know if we need
|
||||||
|
* to start failover procedure or just fix some situation on the
|
||||||
|
* standby.
|
||||||
|
*/
|
||||||
|
fprintf(stderr, "There isn't a master node in the cluster\n");
|
||||||
|
PQclear(res1);
|
||||||
|
PQfinish(master_conn);
|
||||||
|
return (PGconn *) NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,3 +9,4 @@ bool is_standby(PGconn *conn);
|
|||||||
bool is_supported_version(PGconn *conn);
|
bool is_supported_version(PGconn *conn);
|
||||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value);
|
bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value);
|
||||||
const char *get_cluster_size(PGconn *conn);
|
const char *get_cluster_size(PGconn *conn);
|
||||||
|
PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id);
|
||||||
|
|||||||
340
repmgr.c
340
repmgr.c
@@ -3,10 +3,12 @@
|
|||||||
* Copyright (c) 2ndQuadrant, 2010
|
* Copyright (c) 2ndQuadrant, 2010
|
||||||
*
|
*
|
||||||
* Command interpreter for the repmgr
|
* Command interpreter for the repmgr
|
||||||
* This module execute some tasks based on commands and then exit
|
* This module is a command-line utility to easily setup a cluster of
|
||||||
|
* hot standby servers for an HA environment
|
||||||
*
|
*
|
||||||
* Commands implemented are.
|
* Commands implemented are.
|
||||||
* STANDBY CLONE, STANDBY FOLLOW, STANDBY PROMOTE
|
* MASTER REGISTER, STANDBY REGISTER, STANDBY CLONE, STANDBY FOLLOW,
|
||||||
|
* STANDBY PROMOTE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "repmgr.h"
|
#include "repmgr.h"
|
||||||
@@ -21,15 +23,19 @@
|
|||||||
#define RECOVERY_FILE "recovery.conf"
|
#define RECOVERY_FILE "recovery.conf"
|
||||||
#define RECOVERY_DONE_FILE "recovery.done"
|
#define RECOVERY_DONE_FILE "recovery.done"
|
||||||
|
|
||||||
#define STANDBY_NORMAL 0 /* Not a real action, just to initialize */
|
#define NO_ACTION 0 /* Not a real action, just to initialize */
|
||||||
#define STANDBY_CLONE 1
|
#define MASTER_REGISTER 1
|
||||||
#define STANDBY_PROMOTE 2
|
#define STANDBY_REGISTER 2
|
||||||
#define STANDBY_FOLLOW 3
|
#define STANDBY_CLONE 3
|
||||||
|
#define STANDBY_PROMOTE 4
|
||||||
|
#define STANDBY_FOLLOW 5
|
||||||
|
|
||||||
static void help(const char *progname);
|
static void help(const char *progname);
|
||||||
static bool create_recovery_file(const char *data_dir);
|
static bool create_recovery_file(const char *data_dir);
|
||||||
static int copy_remote_files(char *host, char *remote_path, char *local_path, bool is_directory);
|
static int copy_remote_files(char *host, char *remote_path, char *local_path, bool is_directory);
|
||||||
|
|
||||||
|
static void do_master_register(void);
|
||||||
|
static void do_standby_register(void);
|
||||||
static void do_standby_clone(void);
|
static void do_standby_clone(void);
|
||||||
static void do_standby_promote(void);
|
static void do_standby_promote(void);
|
||||||
static void do_standby_follow(void);
|
static void do_standby_follow(void);
|
||||||
@@ -43,6 +49,7 @@ const char *dbname = NULL;
|
|||||||
char *host = NULL;
|
char *host = NULL;
|
||||||
char *username = NULL;
|
char *username = NULL;
|
||||||
char *dest_dir = NULL;
|
char *dest_dir = NULL;
|
||||||
|
char *config_file = NULL;
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
bool force = false;
|
bool force = false;
|
||||||
|
|
||||||
@@ -50,8 +57,8 @@ int numport = 0;
|
|||||||
char *masterport = NULL;
|
char *masterport = NULL;
|
||||||
char *standbyport = NULL;
|
char *standbyport = NULL;
|
||||||
|
|
||||||
char *stndby = NULL;
|
char *server_mode = NULL;
|
||||||
char *stndby_cmd = NULL;
|
char *server_cmd = NULL;
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
@@ -63,6 +70,7 @@ main(int argc, char **argv)
|
|||||||
{"port", required_argument, NULL, 'p'},
|
{"port", required_argument, NULL, 'p'},
|
||||||
{"username", required_argument, NULL, 'U'},
|
{"username", required_argument, NULL, 'U'},
|
||||||
{"dest-dir", required_argument, NULL, 'D'},
|
{"dest-dir", required_argument, NULL, 'D'},
|
||||||
|
{"config-file", required_argument, NULL, 'f'},
|
||||||
{"force", no_argument, NULL, 'F'},
|
{"force", no_argument, NULL, 'F'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
@@ -70,7 +78,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
int optindex;
|
int optindex;
|
||||||
int c;
|
int c;
|
||||||
int action = STANDBY_NORMAL;
|
int action = NO_ACTION;
|
||||||
|
|
||||||
progname = get_progname(argv[0]);
|
progname = get_progname(argv[0]);
|
||||||
|
|
||||||
@@ -89,7 +97,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "d:h:p:U:D:Fv", long_options, &optindex)) != -1)
|
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:F:v", long_options, &optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
@@ -119,6 +127,11 @@ main(int argc, char **argv)
|
|||||||
case 'D':
|
case 'D':
|
||||||
dest_dir = optarg;
|
dest_dir = optarg;
|
||||||
break;
|
break;
|
||||||
|
case 'f':
|
||||||
|
config_file = optarg;
|
||||||
|
if (config_file == NULL)
|
||||||
|
sprintf(config_file, "./%s", CONFIG_FILE);
|
||||||
|
break;
|
||||||
case 'F':
|
case 'F':
|
||||||
force = true;
|
force = true;
|
||||||
break;
|
break;
|
||||||
@@ -132,16 +145,17 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we need to obtain the action, this comes in the form:
|
* Now we need to obtain the action, this comes in one of these forms:
|
||||||
* STANDBY {CLONE [node]|PROMOTE|FOLLOW [node]}
|
* MASTER REGISTER |
|
||||||
|
* STANDBY {REGISTER | CLONE [node] | PROMOTE | FOLLOW [node]}
|
||||||
*
|
*
|
||||||
* the node part is optional, if we receive it then we shouldn't
|
* the node part is optional, if we receive it then we shouldn't
|
||||||
* have received a -h option
|
* have received a -h option
|
||||||
*/
|
*/
|
||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
stndby = argv[optind++];
|
server_mode = argv[optind++];
|
||||||
if (strcasecmp(stndby, "STANDBY") != 0)
|
if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||||
exit(1);
|
exit(1);
|
||||||
@@ -150,12 +164,25 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
stndby_cmd = argv[optind++];
|
server_cmd = argv[optind++];
|
||||||
if (strcasecmp(stndby_cmd, "CLONE") == 0)
|
if (strcasecmp(server_cmd, "REGISTER") == 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* we don't use this info in any other place so i will
|
||||||
|
* just execute the compare again instead of having an
|
||||||
|
* additional variable to hold a value that we will use
|
||||||
|
* no more
|
||||||
|
*/
|
||||||
|
if (strcasecmp(server_mode, "MASTER") == 0)
|
||||||
|
action = MASTER_REGISTER;
|
||||||
|
else if (strcasecmp(server_mode, "STANDBY") == 0)
|
||||||
|
action = STANDBY_REGISTER;
|
||||||
|
}
|
||||||
|
else if (strcasecmp(server_cmd, "CLONE") == 0)
|
||||||
action = STANDBY_CLONE;
|
action = STANDBY_CLONE;
|
||||||
else if (strcasecmp(stndby_cmd, "PROMOTE") == 0)
|
else if (strcasecmp(server_cmd, "PROMOTE") == 0)
|
||||||
action = STANDBY_PROMOTE;
|
action = STANDBY_PROMOTE;
|
||||||
else if (strcasecmp(stndby_cmd, "FOLLOW") == 0)
|
else if (strcasecmp(server_cmd, "FOLLOW") == 0)
|
||||||
action = STANDBY_FOLLOW;
|
action = STANDBY_FOLLOW;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -164,8 +191,9 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For STANDBY CLONE and STANDBY FOLLOW we still can receive a last argument */
|
/* For some actions we still can receive a last argument */
|
||||||
if ((action == STANDBY_CLONE) || (action == STANDBY_FOLLOW))
|
if ((action == STANDBY_CLONE) || (action == STANDBY_FOLLOW) ||
|
||||||
|
(action == MASTER_REGISTER) || (action == STANDBY_REGISTER))
|
||||||
{
|
{
|
||||||
if (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
@@ -213,6 +241,12 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
|
case MASTER_REGISTER:
|
||||||
|
do_master_register();
|
||||||
|
break;
|
||||||
|
case STANDBY_REGISTER:
|
||||||
|
do_standby_register();
|
||||||
|
break;
|
||||||
case STANDBY_CLONE:
|
case STANDBY_CLONE:
|
||||||
do_standby_clone();
|
do_standby_clone();
|
||||||
break;
|
break;
|
||||||
@@ -231,6 +265,266 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_master_register(void)
|
||||||
|
{
|
||||||
|
PGconn *conn;
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[8192];
|
||||||
|
|
||||||
|
char myClusterName[MAXLEN];
|
||||||
|
int myLocalId = -1;
|
||||||
|
char conninfo[MAXLEN];
|
||||||
|
|
||||||
|
bool schema_exists = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the configuration file: repmgr.conf
|
||||||
|
*/
|
||||||
|
parse_config(config_file, myClusterName, &myLocalId, conninfo);
|
||||||
|
if (myLocalId == -1)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Node information is missing. "
|
||||||
|
"Check the configuration file.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
conn = establishDBConnection(conninfo, true);
|
||||||
|
|
||||||
|
/* Check we are a master */
|
||||||
|
if (is_standby(conn))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "repmgr: This node should be a master\n");
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there is a schema for this cluster */
|
||||||
|
sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName);
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!PQgetisnull(res, 0, 0)) /* schema exists */
|
||||||
|
{
|
||||||
|
if (!force) /* and we are not forcing so error */
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName);
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
schema_exists = true;
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
if (!schema_exists)
|
||||||
|
{
|
||||||
|
/* ok, create the schema */
|
||||||
|
sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName);
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n",
|
||||||
|
myClusterName, PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ... the tables */
|
||||||
|
sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( "
|
||||||
|
" id integer primary key, "
|
||||||
|
" cluster text not null, "
|
||||||
|
" conninfo text not null)", myClusterName);
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot create the table repmgr_%s.repl_nodes: %s\n",
|
||||||
|
myClusterName, PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( "
|
||||||
|
" primary_node INTEGER NOT NULL, "
|
||||||
|
" standby_node INTEGER NOT NULL, "
|
||||||
|
" last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, "
|
||||||
|
" last_wal_primary_location TEXT NOT NULL, "
|
||||||
|
" last_wal_standby_location TEXT NOT NULL, "
|
||||||
|
" replication_lag BIGINT NOT NULL, "
|
||||||
|
" apply_lag BIGINT NOT NULL) ", myClusterName);
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot create the table repmgr_%s.repl_monitor: %s\n",
|
||||||
|
myClusterName, PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* and the view */
|
||||||
|
sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS "
|
||||||
|
" WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node "
|
||||||
|
" ORDER BY last_monitor_time desc) "
|
||||||
|
" FROM repmgr_%s.repl_monitor) "
|
||||||
|
" SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, "
|
||||||
|
" last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, "
|
||||||
|
" pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag "
|
||||||
|
" FROM monitor_info a "
|
||||||
|
" WHERE row_number = 1", myClusterName, myClusterName);
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot create the view repmgr_%s.repl_status: %s\n",
|
||||||
|
myClusterName, PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PGconn *master_conn;
|
||||||
|
int id;
|
||||||
|
|
||||||
|
/* Ensure there isn't any other master already registered */
|
||||||
|
master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id);
|
||||||
|
if (master_conn != NULL)
|
||||||
|
{
|
||||||
|
PQfinish(master_conn);
|
||||||
|
fprintf(stderr, "There is a master already in this cluster");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now register the master */
|
||||||
|
if (force)
|
||||||
|
{
|
||||||
|
sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes "
|
||||||
|
" WHERE id = %d",
|
||||||
|
myClusterName, myLocalId);
|
||||||
|
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot delete node details, %s\n",
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes "
|
||||||
|
"VALUES (%d, '%s', '%s')",
|
||||||
|
myClusterName, myLocalId, myClusterName, conninfo);
|
||||||
|
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot insert node details, %s\n",
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_standby_register(void)
|
||||||
|
{
|
||||||
|
PGconn *conn;
|
||||||
|
PGconn *master_conn;
|
||||||
|
int master_id;
|
||||||
|
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[8192];
|
||||||
|
|
||||||
|
char myClusterName[MAXLEN];
|
||||||
|
int myLocalId = -1;
|
||||||
|
char conninfo[MAXLEN];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the configuration file: repmgr.conf
|
||||||
|
*/
|
||||||
|
parse_config(config_file, myClusterName, &myLocalId, conninfo);
|
||||||
|
if (myLocalId == -1)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Node information is missing. "
|
||||||
|
"Check the configuration file.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
conn = establishDBConnection(conninfo, true);
|
||||||
|
|
||||||
|
/* Check we are a standby */
|
||||||
|
if (!is_standby(conn))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "repmgr: This node should be a standby\n");
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there is a schema for this cluster */
|
||||||
|
sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName);
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQgetisnull(res, 0, 0)) /* schema doesn't exists */
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName);
|
||||||
|
PQclear(res);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id);
|
||||||
|
if (!master_conn)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Now register the standby */
|
||||||
|
|
||||||
|
if (force)
|
||||||
|
{
|
||||||
|
sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes "
|
||||||
|
" WHERE id = %d",
|
||||||
|
myClusterName, myLocalId);
|
||||||
|
|
||||||
|
if (!PQexec(master_conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot delete node details, %s\n",
|
||||||
|
PQerrorMessage(master_conn));
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes "
|
||||||
|
"VALUES (%d, '%s', '%s')",
|
||||||
|
myClusterName, myLocalId, myClusterName, conninfo);
|
||||||
|
|
||||||
|
if (!PQexec(master_conn, sqlquery))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Cannot insert node details, %s\n",
|
||||||
|
PQerrorMessage(master_conn));
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_standby_clone(void)
|
do_standby_clone(void)
|
||||||
{
|
{
|
||||||
@@ -238,7 +532,7 @@ do_standby_clone(void)
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[8192];
|
char sqlquery[8192];
|
||||||
|
|
||||||
int r;
|
int r = 0;
|
||||||
int i;
|
int i;
|
||||||
bool pg_dir = false;
|
bool pg_dir = false;
|
||||||
char master_data_directory[MAXLEN];
|
char master_data_directory[MAXLEN];
|
||||||
@@ -783,10 +1077,12 @@ help(const char *progname)
|
|||||||
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
|
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
|
||||||
printf(_(" -p, --port=PORT database server port\n"));
|
printf(_(" -p, --port=PORT database server port\n"));
|
||||||
printf(_(" -U, --username=USERNAME user name to connect as\n"));
|
printf(_(" -U, --username=USERNAME user name to connect as\n"));
|
||||||
printf(_(" -D, --data-dir=DIR directory where the files will be copied to\n"));
|
printf(_(" -D, --data-dir=DIR directory where the files will be copied to\n"));
|
||||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
||||||
printf(_("or making follow another node and then exits.\n"));
|
printf(_("or making follow another node and then exits.\n"));
|
||||||
printf(_("COMMANDS:\n"));
|
printf(_("COMMANDS:\n"));
|
||||||
|
printf(_(" master register - registers the master in a cluster\n"));
|
||||||
|
printf(_(" standby register - registers a standby in a cluster\n"));
|
||||||
printf(_(" standby clone [node] - allows creation of a new standby\n"));
|
printf(_(" standby clone [node] - allows creation of a new standby\n"));
|
||||||
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
|
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
|
||||||
printf(_("new master in the event of a failover\n"));
|
printf(_("new master in the event of a failover\n"));
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
CREATE USER repmgr;
|
CREATE USER repmgr;
|
||||||
CREATE DATABASE repmgr OWNER repmgr;
|
CREATE SCHEMA repmgr;
|
||||||
|
|
||||||
\c repmgr
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The table repl_nodes keeps information about all machines in
|
* The table repl_nodes keeps information about all machines in
|
||||||
@@ -40,7 +38,7 @@ ALTER TABLE repl_monitor OWNER TO repmgr;
|
|||||||
* apply_lag: in bytes (this is how far the latest xlog record
|
* apply_lag: in bytes (this is how far the latest xlog record
|
||||||
* we have applied is from the latest record we
|
* we have applied is from the latest record we
|
||||||
* have received)
|
* have received)
|
||||||
* time-lag: how many seconds are we from being up-to-date with master
|
* time_lag: how many seconds are we from being up-to-date with master
|
||||||
*/
|
*/
|
||||||
drop view if exists repl_status;
|
drop view if exists repl_status;
|
||||||
CREATE VIEW repl_status AS
|
CREATE VIEW repl_status AS
|
||||||
|
|||||||
87
repmgrd.c
87
repmgrd.c
@@ -40,7 +40,6 @@ bool verbose = false;
|
|||||||
static void help(const char *progname);
|
static void help(const char *progname);
|
||||||
static void checkClusterConfiguration(void);
|
static void checkClusterConfiguration(void);
|
||||||
static void checkNodeConfiguration(char *conninfo);
|
static void checkNodeConfiguration(char *conninfo);
|
||||||
static void getPrimaryConnection(void);
|
|
||||||
static void CancelQuery(void);
|
static void CancelQuery(void);
|
||||||
|
|
||||||
static void MonitorExecute(void);
|
static void MonitorExecute(void);
|
||||||
@@ -148,7 +147,9 @@ main(int argc, char **argv)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* I need the id of the primary as well as a connection to it */
|
/* I need the id of the primary as well as a connection to it */
|
||||||
getPrimaryConnection();
|
primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId);
|
||||||
|
if (primaryConn == NULL)
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
checkClusterConfiguration();
|
checkClusterConfiguration();
|
||||||
@@ -165,88 +166,6 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* get a connection to primary by reading repl_nodes, creating a connection
|
|
||||||
* to each node (one at a time) and finding if it is a primary or a standby
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
getPrimaryConnection(void)
|
|
||||||
{
|
|
||||||
PGresult *res1;
|
|
||||||
PGresult *res2;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* find all nodes belonging to this cluster */
|
|
||||||
sprintf(sqlquery, "SELECT * FROM repl_nodes "
|
|
||||||
" WHERE cluster = '%s' and id <> %d",
|
|
||||||
myClusterName, myLocalId);
|
|
||||||
|
|
||||||
res1 = PQexec(myLocalConn, sqlquery);
|
|
||||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(myLocalConn));
|
|
||||||
PQclear(res1);
|
|
||||||
CloseConnections();
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < PQntuples(res1); i++)
|
|
||||||
{
|
|
||||||
/* initialize with the values of the current node being processed */
|
|
||||||
primaryId = atoi(PQgetvalue(res1, i, 0));
|
|
||||||
strcpy(primaryConninfo, PQgetvalue(res1, i, 2));
|
|
||||||
primaryConn = establishDBConnection(primaryConninfo, false);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* I can't use the is_standby() function here because on error that
|
|
||||||
* function closes the one i pass and exit, but i still need to close
|
|
||||||
* myLocalConn
|
|
||||||
*/
|
|
||||||
res2 = PQexec(primaryConn, "SELECT pg_is_in_recovery()");
|
|
||||||
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(primaryConn));
|
|
||||||
PQclear(res1);
|
|
||||||
PQclear(res2);
|
|
||||||
CloseConnections();
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if false, this is the primary */
|
|
||||||
if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
|
|
||||||
{
|
|
||||||
PQclear(res2);
|
|
||||||
PQclear(res1);
|
|
||||||
/* We turn off synchronous_commit for the monitor info inserts */
|
|
||||||
res1 = PQexec(primaryConn, "SET synchronous_commit TO off");
|
|
||||||
PQclear(res1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* if it is a standby clear info */
|
|
||||||
PQclear(res2);
|
|
||||||
PQfinish(primaryConn);
|
|
||||||
primaryId = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If we finish this loop without finding a primary then
|
|
||||||
* we doesn't have the info or the primary has failed (or we
|
|
||||||
* reached max_connections or superuser_reserved_connections,
|
|
||||||
* anything else i'm missing?),
|
|
||||||
* Probably we will need to check the error to know if we need
|
|
||||||
* to start failover procedure or just fix some situation on the
|
|
||||||
* standby.
|
|
||||||
*/
|
|
||||||
fprintf(stderr, "There isn't a primary node the cluster\n");
|
|
||||||
PQclear(res1);
|
|
||||||
CloseConnections();
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert monitor info, this is basically the time and xlog replayed,
|
* Insert monitor info, this is basically the time and xlog replayed,
|
||||||
* applied on standby and current xlog location in primary.
|
* applied on standby and current xlog location in primary.
|
||||||
|
|||||||
Reference in New Issue
Block a user