mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
86 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c3d79147b | ||
|
|
ca470647cb | ||
|
|
62ee287e3f | ||
|
|
729a1b848a | ||
|
|
701cf043fd | ||
|
|
bbb67c55f6 | ||
|
|
c2c48a9fe6 | ||
|
|
9d6ac2ebf9 | ||
|
|
680f23fb1d | ||
|
|
1159113c58 | ||
|
|
f25a709454 | ||
|
|
897daddcc7 | ||
|
|
0fdcce0477 | ||
|
|
de58eff7c1 | ||
|
|
f2a0b31a20 | ||
|
|
e007a55967 | ||
|
|
d235c696af | ||
|
|
4ef6fbb5fe | ||
|
|
2e61d7b156 | ||
|
|
4496a0761e | ||
|
|
3978ead184 | ||
|
|
b36dbf61fe | ||
|
|
84466ecca5 | ||
|
|
649086e5e4 | ||
|
|
7cf2eb440d | ||
|
|
388bbfb773 | ||
|
|
a89aa02c68 | ||
|
|
c81793b63f | ||
|
|
b4e83cf188 | ||
|
|
1db61ce277 | ||
|
|
41abf9a7ef | ||
|
|
abebc53ddc | ||
|
|
5fc4a0382f | ||
|
|
a7d3c9b93a | ||
|
|
ee9dc9e247 | ||
|
|
94cb5b94e7 | ||
|
|
a08aa50f92 | ||
|
|
9563877fbb | ||
|
|
4f3bd6612c | ||
|
|
192ee3cdb0 | ||
|
|
6f149ead8f | ||
|
|
77aa6aa326 | ||
|
|
18206b3a64 | ||
|
|
91446bcf93 | ||
|
|
dcdf8788ae | ||
|
|
4fabfbbbd0 | ||
|
|
c41030b40e | ||
|
|
a0fdadd5d2 | ||
|
|
4c3d7f80ed | ||
|
|
6e3fe059d8 | ||
|
|
9f26254ac3 | ||
|
|
0e8ff1730e | ||
|
|
634fdff303 | ||
|
|
cbce29f009 | ||
|
|
920f925e4b | ||
|
|
9fe2d6886e | ||
|
|
0068dd573a | ||
|
|
d0f3cb59c7 | ||
|
|
7428e92e10 | ||
|
|
a97065113d | ||
|
|
9e2f276fcf | ||
|
|
b0cd2b5e43 | ||
|
|
079a7c9f16 | ||
|
|
bdf957ca52 | ||
|
|
67b451aa45 | ||
|
|
0a70d907ae | ||
|
|
ab1d380843 | ||
|
|
672b237c4e | ||
|
|
7d94151494 | ||
|
|
81b8a944de | ||
|
|
1b69282df9 | ||
|
|
06dd252f69 | ||
|
|
d6bd5aa381 | ||
|
|
bbdcffa813 | ||
|
|
5f33d9d715 | ||
|
|
877f4cf82e | ||
|
|
949f5ee498 | ||
|
|
eb2f7efb4a | ||
|
|
85ff3ec286 | ||
|
|
2803bb92a8 | ||
|
|
16fe41eecf | ||
|
|
d365a309fc | ||
|
|
d5a41bb587 | ||
|
|
474d3217b4 | ||
|
|
7a00d5a9a4 | ||
|
|
5683b905dd |
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2012, 2ndQuadrant Limited
|
Copyright (c) 2010-2014, 2ndQuadrant Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
11
HISTORY
11
HISTORY
@@ -1,3 +1,14 @@
|
|||||||
|
2.0stable 2014-01-30
|
||||||
|
Documentation fixes (Christian)
|
||||||
|
General refactoring, code quality improvements and stabilization work (Christian)
|
||||||
|
Added proper daemonizing (-d/--daemonize) (Christian)
|
||||||
|
Added PID file handling (-p/--pid-file) (Christian)
|
||||||
|
New config option: monitor_interval_secs (Christian)
|
||||||
|
New config option: retry_promote_interval (Christian)
|
||||||
|
New config option: logfile (Christian)
|
||||||
|
New config option: pg_bindir (Christian)
|
||||||
|
New config option: pgctl_options (Christian)
|
||||||
|
|
||||||
2.0beta2 2013-12-19
|
2.0beta2 2013-12-19
|
||||||
Improve autofailover logic and algorithms (Jaime, Andres)
|
Improve autofailover logic and algorithms (Jaime, Andres)
|
||||||
Ignore pg_log when cloning (Jaime)
|
Ignore pg_log when cloning (Jaime)
|
||||||
|
|||||||
11
Makefile
11
Makefile
@@ -1,6 +1,6 @@
|
|||||||
#
|
#
|
||||||
# Makefile
|
# Makefile
|
||||||
# Copyright (c) 2ndQuadrant, 2010-2012
|
# Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
|
|
||||||
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
|
||||||
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
|
||||||
@@ -21,7 +21,8 @@ repmgr: $(repmgr_OBJS)
|
|||||||
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
|
||||||
|
|
||||||
ifdef USE_PGXS
|
ifdef USE_PGXS
|
||||||
PGXS := $(shell pg_config --pgxs)
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
include $(PGXS)
|
include $(PGXS)
|
||||||
else
|
else
|
||||||
subdir = contrib/repmgr
|
subdir = contrib/repmgr
|
||||||
@@ -32,9 +33,13 @@ endif
|
|||||||
|
|
||||||
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
|
||||||
# is overriding pgxs install.
|
# is overriding pgxs install.
|
||||||
install:
|
install: install_prog install_ext
|
||||||
|
|
||||||
|
install_prog:
|
||||||
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
|
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
|
||||||
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)'
|
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)'
|
||||||
|
|
||||||
|
install_ext:
|
||||||
$(MAKE) -C sql install
|
$(MAKE) -C sql install
|
||||||
|
|
||||||
ifneq (,$(DATA)$(DATA_built))
|
ifneq (,$(DATA)$(DATA_built))
|
||||||
|
|||||||
@@ -1085,7 +1085,7 @@ License and Contributions
|
|||||||
=========================
|
=========================
|
||||||
|
|
||||||
repmgr is licensed under the GPL v3. All of its code and documentation is
|
repmgr is licensed under the GPL v3. All of its code and documentation is
|
||||||
Copyright 2010-2012, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
Copyright 2010-2014, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
Main sponsorship of repmgr has been from 2ndQuadrant customers.
|
Main sponsorship of repmgr has been from 2ndQuadrant customers.
|
||||||
|
|||||||
19
TODO
19
TODO
@@ -1,21 +1,18 @@
|
|||||||
Known issues in repmgr
|
Known issues in repmgr
|
||||||
======================
|
======================
|
||||||
|
|
||||||
* The check for whether ``wal_keep_segments`` is considered large enough
|
|
||||||
does a string comparison rather than an integer one. It can give both
|
|
||||||
false positive (setting is large enough but flagged as too small) and
|
|
||||||
false negative (setting is too small but not noted as such) errors.
|
|
||||||
|
|
||||||
* When running repmgr against a remote machine, operations that start
|
* When running repmgr against a remote machine, operations that start
|
||||||
the database server using the ``pg_ctl`` command may accidentally
|
the database server using the ``pg_ctl`` command may accidentally
|
||||||
terminate after their associated ssh session ends.
|
terminate after their associated ssh session ends.
|
||||||
|
|
||||||
* After running repmgrd as a regular foreground application, hitting
|
|
||||||
control-C causes the program to crash.
|
|
||||||
|
|
||||||
Planned feature improvements
|
Planned feature improvements
|
||||||
============================
|
============================
|
||||||
|
|
||||||
* Before running ``pg_start_backup()``, a sanity check that there is a
|
* Timeline increases when promoting a standby
|
||||||
a working ssh connection to the destination would help find
|
|
||||||
configuration errors before disturbing the database.
|
* A better check which standby did receive most of the data
|
||||||
|
|
||||||
|
* Make the fact that a standby may be delayed a factor in the voting
|
||||||
|
algorithm
|
||||||
|
|
||||||
|
* include support for delayed standbys
|
||||||
|
|||||||
@@ -13,13 +13,16 @@ Installation
|
|||||||
|
|
||||||
For convenience, we define:
|
For convenience, we define:
|
||||||
|
|
||||||
* node1 is the hostname fully qualified of the Master server, IP 192.168.1.10
|
**node1**
|
||||||
* node2 is the hostname fully qualified of the Standby server, IP 192.168.1.11
|
is the hostname fully qualified of the Master server, IP 192.168.1.10
|
||||||
* witness is the hostname fully qualified of the server used for witness, IP 192.168.1.12
|
**node2**
|
||||||
|
is the hostname fully qualified of the Standby server, IP 192.168.1.11
|
||||||
|
**witness**
|
||||||
|
is the hostname fully qualified of the server used for witness, IP 192.168.1.12
|
||||||
|
|
||||||
:Note: It is not recommanded to use name defining status of a server like «masterserver»,
|
**Note:** It is not recommanded to use name defining status of a server like «masterserver»,
|
||||||
this is a name leading to confusion once a failover take place and the Master is
|
this is a name leading to confusion once a failover take place and the Master is
|
||||||
now on the «standbyserver».
|
now on the «standbyserver».
|
||||||
|
|
||||||
Summary
|
Summary
|
||||||
-------
|
-------
|
||||||
@@ -30,17 +33,24 @@ and a witness-repmgrd is installed in a third server where it uses a PostgreSQL
|
|||||||
cluster to communicate with other repmgrd daemons.
|
cluster to communicate with other repmgrd daemons.
|
||||||
|
|
||||||
1. Install PostgreSQL in all the servers involved (including the server used for
|
1. Install PostgreSQL in all the servers involved (including the server used for
|
||||||
witness)
|
witness)
|
||||||
|
|
||||||
2. Install repmgr in all the servers involved (including the server used for witness)
|
2. Install repmgr in all the servers involved (including the server used for witness)
|
||||||
|
|
||||||
3. Configure the Master PostreSQL
|
3. Configure the Master PostreSQL
|
||||||
|
|
||||||
4. Clone the Master to the Standby using "repmgr standby clone" command
|
4. Clone the Master to the Standby using "repmgr standby clone" command
|
||||||
|
|
||||||
5. Configure repmgr in all the servers involved (including the server used for witness)
|
5. Configure repmgr in all the servers involved (including the server used for witness)
|
||||||
|
|
||||||
6. Register Master and Standby nodes
|
6. Register Master and Standby nodes
|
||||||
|
|
||||||
7. Initiate witness server
|
7. Initiate witness server
|
||||||
|
|
||||||
8. Start the repmgrd daemons in all nodes
|
8. Start the repmgrd daemons in all nodes
|
||||||
|
|
||||||
:Note: A complete Hight-Availability design need at least 3 servers to still have
|
**Note** A complete Hight-Availability design need at least 3 servers to still have
|
||||||
a backup node after a first failure.
|
a backup node after a first failure.
|
||||||
|
|
||||||
Install PostgreSQL
|
Install PostgreSQL
|
||||||
------------------
|
------------------
|
||||||
@@ -76,8 +86,8 @@ Edit the file pg_hba.conf and add lines for the replication::
|
|||||||
host repmgr repmgr 192.168.1.10/30 trust
|
host repmgr repmgr 192.168.1.10/30 trust
|
||||||
host replication all 192.168.1.10/30 trust
|
host replication all 192.168.1.10/30 trust
|
||||||
|
|
||||||
:Note: It is also possible to use a password authentication (md5), .pgpass file
|
**Note:** It is also possible to use a password authentication (md5), .pgpass file
|
||||||
should be edited to allow connection between each node.
|
should be edited to allow connection between each node.
|
||||||
|
|
||||||
Create the user and database to manage replication::
|
Create the user and database to manage replication::
|
||||||
|
|
||||||
@@ -137,16 +147,26 @@ Log in each server and configure repmgr by editing the file
|
|||||||
promote_command='promote_command.sh'
|
promote_command='promote_command.sh'
|
||||||
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
|
follow_command='repmgr standby follow -f /etc/repmgr/repmgr.conf'
|
||||||
|
|
||||||
* *cluster* is the name of the current replication.
|
**cluster**
|
||||||
* *node* is the number of the current node (1, 2 or 3 in the current example).
|
is the name of the current replication.
|
||||||
* *node_name* is an identifier for every node.
|
**node**
|
||||||
* *conninfo* is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
|
is the number of the current node (1, 2 or 3 in the current example).
|
||||||
* *master_response_timeout* is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
|
**node_name**
|
||||||
* *reconnect_attempts* is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
|
is an identifier for every node.
|
||||||
* *reconnect_interval* is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
|
**conninfo**
|
||||||
* *failover* configure behavior : *manual* or *automatic*.
|
is used to connect to the local PostgreSQL server (where the configuration file is) from any node. In the witness server configuration it is needed to add a 'port=5499' to the conninfo.
|
||||||
* *promote_command* the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
|
**master_response_timeout**
|
||||||
* *follow_command* the command executed to address the current standby to another Master. The command must return 0 on success.
|
is the maximum amount of time we are going to wait before deciding the master has died and start failover procedure.
|
||||||
|
**reconnect_attempts**
|
||||||
|
is the number of times we will try to reconnect to master after a failure has been detected and before start failover procedure.
|
||||||
|
**reconnect_interval**
|
||||||
|
is the amount of time between retries to reconnect to master after a failure has been detected and before start failover procedure.
|
||||||
|
**failover**
|
||||||
|
configure behavior: *manual* or *automatic*.
|
||||||
|
**promote_command**
|
||||||
|
the command executed to do the failover (including the PostgreSQL failover itself). The command must return 0 on success.
|
||||||
|
**follow_command**
|
||||||
|
the command executed to address the current standby to another Master. The command must return 0 on success.
|
||||||
|
|
||||||
Register Master and Standby
|
Register Master and Standby
|
||||||
---------------------------
|
---------------------------
|
||||||
@@ -158,9 +178,7 @@ Register the node as Master::
|
|||||||
su - postgres
|
su - postgres
|
||||||
repmgr -f /etc/repmgr/repmgr.conf master register
|
repmgr -f /etc/repmgr/repmgr.conf master register
|
||||||
|
|
||||||
Log in node2.
|
Log in node2. Register it as a standby::
|
||||||
|
|
||||||
Register the node as Standby::
|
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgr -f /etc/repmgr/repmgr.conf standby register
|
repmgr -f /etc/repmgr/repmgr.conf standby register
|
||||||
@@ -183,10 +201,10 @@ Start the repmgrd daemons
|
|||||||
|
|
||||||
Log in node2 and witness.
|
Log in node2 and witness.
|
||||||
|
|
||||||
su - postgres
|
su - postgres
|
||||||
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
|
repmgrd -f /etc/repmgr/repmgr.conf > /var/log/postgresql/repmgr.log 2>&1
|
||||||
|
|
||||||
:Note: The Master does not need a repmgrd daemon.
|
**Note:** The Master does not need a repmgrd daemon.
|
||||||
|
|
||||||
|
|
||||||
Suspend Automatic behavior
|
Suspend Automatic behavior
|
||||||
@@ -201,12 +219,6 @@ Then, signal repmgrd daemon::
|
|||||||
su - postgres
|
su - postgres
|
||||||
kill -HUP `pidoff repmgrd`
|
kill -HUP `pidoff repmgrd`
|
||||||
|
|
||||||
TODO : -HUP configuration update is not implemented and it should check its
|
|
||||||
configuration file against its configuration in DB, updating
|
|
||||||
accordingly the SQL conf (especialy the failover manual or auto)
|
|
||||||
this allow witness-standby and standby-not-promotable features
|
|
||||||
and simpler usage of the tool ;)
|
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|||||||
14
check_dir.c
14
check_dir.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.c - Directories management functions
|
* check_dir.c - Directories management functions
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -225,12 +225,12 @@ is_pg_dir(char *dir)
|
|||||||
struct stat sb;
|
struct stat sb;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
// test pgdata
|
/* test pgdata */
|
||||||
xsnprintf(path, buf_sz, "%s/PG_VERSION", dir);
|
xsnprintf(path, buf_sz, "%s/PG_VERSION", dir);
|
||||||
if (stat(path, &sb) == 0)
|
if (stat(path, &sb) == 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// test tablespace dir
|
/* test tablespace dir */
|
||||||
sprintf(path, "ls %s/PG_*/ -I*", dir);
|
sprintf(path, "ls %s/PG_*/ -I*", dir);
|
||||||
r = system(path);
|
r = system(path);
|
||||||
if (r == 0)
|
if (r == 0)
|
||||||
@@ -256,7 +256,7 @@ create_pgdir(char *dir, bool force)
|
|||||||
{
|
{
|
||||||
log_err(_("couldn't create directory \"%s\"...\n"),
|
log_err(_("couldn't create directory \"%s\"...\n"),
|
||||||
dir);
|
dir);
|
||||||
exit(ERR_BAD_CONFIG);
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
@@ -268,7 +268,7 @@ create_pgdir(char *dir, bool force)
|
|||||||
{
|
{
|
||||||
log_err(_("could not change permissions of directory \"%s\": %s\n"),
|
log_err(_("could not change permissions of directory \"%s\": %s\n"),
|
||||||
dir, strerror(errno));
|
dir, strerror(errno));
|
||||||
exit(ERR_BAD_CONFIG);
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
@@ -293,7 +293,7 @@ create_pgdir(char *dir, bool force)
|
|||||||
"If you are sure you want to clone here, "
|
"If you are sure you want to clone here, "
|
||||||
"please check there is no PostgreSQL server "
|
"please check there is no PostgreSQL server "
|
||||||
"running and use the --force option\n"));
|
"running and use the --force option\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@@ -301,7 +301,7 @@ create_pgdir(char *dir, bool force)
|
|||||||
/* Trouble accessing directory */
|
/* Trouble accessing directory */
|
||||||
log_err(_("could not access directory \"%s\": %s\n"),
|
log_err(_("could not access directory \"%s\": %s\n"),
|
||||||
dir, strerror(errno));
|
dir, strerror(errno));
|
||||||
exit(ERR_BAD_CONFIG);
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* check_dir.h
|
* check_dir.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2012
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
43
config.c
43
config.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* config.c - Functions to parse the config file
|
* config.c - Functions to parse the config file
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -42,6 +42,8 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
memset(options->follow_command, 0, sizeof(options->follow_command));
|
memset(options->follow_command, 0, sizeof(options->follow_command));
|
||||||
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
memset(options->rsync_options, 0, sizeof(options->rsync_options));
|
||||||
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
memset(options->ssh_options, 0, sizeof(options->ssh_options));
|
||||||
|
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||||
|
memset(options->pgctl_options, 0, sizeof(options->pgctl_options));
|
||||||
|
|
||||||
/* if nothing has been provided defaults to 60 */
|
/* if nothing has been provided defaults to 60 */
|
||||||
options->master_response_timeout = 60;
|
options->master_response_timeout = 60;
|
||||||
@@ -50,6 +52,9 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
options->reconnect_attempts = 6;
|
options->reconnect_attempts = 6;
|
||||||
options->reconnect_intvl = 10;
|
options->reconnect_intvl = 10;
|
||||||
|
|
||||||
|
options->monitor_interval_secs = 2;
|
||||||
|
options->retry_promote_interval_secs = 300;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since some commands don't require a config file at all, not
|
* Since some commands don't require a config file at all, not
|
||||||
* having one isn't necessarily a problem.
|
* having one isn't necessarily a problem.
|
||||||
@@ -114,6 +119,16 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
options->reconnect_attempts = atoi(value);
|
options->reconnect_attempts = atoi(value);
|
||||||
else if (strcmp(name, "reconnect_interval") == 0)
|
else if (strcmp(name, "reconnect_interval") == 0)
|
||||||
options->reconnect_intvl = atoi(value);
|
options->reconnect_intvl = atoi(value);
|
||||||
|
else if (strcmp(name, "pg_bindir") == 0)
|
||||||
|
strncpy (options->pg_bindir, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||||
|
strncpy (options->pgctl_options, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "logfile") == 0)
|
||||||
|
strncpy(options->logfile, value, MAXLEN);
|
||||||
|
else if (strcmp(name, "monitor_interval_secs") == 0)
|
||||||
|
options->monitor_interval_secs = atoi(value);
|
||||||
|
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||||
|
options->retry_promote_interval_secs = atoi(value);
|
||||||
else
|
else
|
||||||
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
|
log_warning(_("%s/%s: Unknown name/value pair!\n"), name, value);
|
||||||
}
|
}
|
||||||
@@ -151,6 +166,12 @@ parse_config(const char *config_file, t_configuration_options *options)
|
|||||||
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
|
log_err(_("Reconnect intervals must be zero or greater. Check the configuration file.\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (*options->pg_bindir == '\0')
|
||||||
|
{
|
||||||
|
log_err(_("pg_bindir config value not found. Check the configuration file.\n"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -221,49 +242,49 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
|
|||||||
parse_config(config_file, &new_options);
|
parse_config(config_file, &new_options);
|
||||||
if (new_options.node == -1)
|
if (new_options.node == -1)
|
||||||
{
|
{
|
||||||
log_warning(_("\nCannot load new configuration, will keep current one.\n"));
|
log_warning(_("Cannot load new configuration, will keep current one.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
|
if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\nCannot change cluster name, will keep current configuration.\n"));
|
log_warning(_("Cannot change cluster name, will keep current configuration.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.node != orig_options->node)
|
if (new_options.node != orig_options->node)
|
||||||
{
|
{
|
||||||
log_warning(_("\nCannot change node number, will keep current configuration.\n"));
|
log_warning(_("Cannot change node number, will keep current configuration.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.node_name != orig_options->node_name)
|
if (strcmp(new_options.node_name, orig_options->node_name) != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\nCannot change standby name, will keep current configuration.\n"));
|
log_warning(_("Cannot change standby name, will keep current configuration.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
|
if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
|
||||||
{
|
{
|
||||||
log_warning(_("\nNew value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
|
log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.master_response_timeout <= 0)
|
if (new_options.master_response_timeout <= 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\nNew value for master_response_timeout is not valid. Should be greater than zero.\n"));
|
log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.reconnect_attempts < 0)
|
if (new_options.reconnect_attempts < 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\nNew value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
|
log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_options.reconnect_intvl < 0)
|
if (new_options.reconnect_intvl < 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\nNew value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
|
log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -271,7 +292,7 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
|
|||||||
conn = establishDBConnection(new_options.conninfo, false);
|
conn = establishDBConnection(new_options.conninfo, false);
|
||||||
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
if (!conn || (PQstatus(conn) != CONNECTION_OK))
|
||||||
{
|
{
|
||||||
log_warning(_("\nconninfo string is not valid, will keep current configuration.\n"));
|
log_warning(_("conninfo string is not valid, will keep current configuration.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
|||||||
9
config.h
9
config.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* config.h
|
* config.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2012
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -40,8 +40,15 @@ typedef struct
|
|||||||
int master_response_timeout;
|
int master_response_timeout;
|
||||||
int reconnect_attempts;
|
int reconnect_attempts;
|
||||||
int reconnect_intvl;
|
int reconnect_intvl;
|
||||||
|
char pg_bindir[MAXLEN];
|
||||||
|
char pgctl_options[MAXLEN];
|
||||||
|
char logfile[MAXLEN];
|
||||||
|
int monitor_interval_secs;
|
||||||
|
int retry_promote_interval_secs;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
|
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", 0, 0 }
|
||||||
|
|
||||||
void parse_config(const char *config_file, t_configuration_options *options);
|
void parse_config(const char *config_file, t_configuration_options *options);
|
||||||
void parse_line(char *buff, char *name, char *value);
|
void parse_line(char *buff, char *name, char *value);
|
||||||
char *trim(char *s);
|
char *trim(char *s);
|
||||||
|
|||||||
91
dbutils.c
91
dbutils.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.c - Database connection/management functions
|
* dbutils.c - Database connection/management functions
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -71,25 +71,22 @@ establishDBConnectionByParams(const char *keywords[], const char *values[],const
|
|||||||
return conn;
|
return conn;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
int
|
||||||
is_standby(PGconn *conn)
|
is_standby(PGconn *conn)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
bool result = false;
|
int result = 0;
|
||||||
|
|
||||||
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Can't query server mode: %s"),
|
log_err(_("Can't query server mode: %s"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
result = -1;
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
||||||
if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
result = 1;
|
||||||
result = true;
|
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return result;
|
return result;
|
||||||
@@ -97,11 +94,11 @@ is_standby(PGconn *conn)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
int
|
||||||
is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
|
is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
bool result = false;
|
int result = 0;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d",
|
sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d",
|
||||||
@@ -110,13 +107,10 @@ is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
|
|||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Can't query server mode: %s"), PQerrorMessage(conn));
|
log_err(_("Can't query server mode: %s"), PQerrorMessage(conn));
|
||||||
PQclear(res);
|
result = -1;
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
else if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
||||||
if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
|
result = 1;
|
||||||
result = true;
|
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return result;
|
return result;
|
||||||
@@ -138,7 +132,7 @@ is_pgup(PGconn *conn, int timeout)
|
|||||||
{
|
{
|
||||||
if (twice)
|
if (twice)
|
||||||
return false;
|
return false;
|
||||||
PQreset(conn); // reconnect
|
PQreset(conn); /* reconnect */
|
||||||
twice = true;
|
twice = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -164,10 +158,10 @@ is_pgup(PGconn *conn, int timeout)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
failed:
|
failed:
|
||||||
// we need to retry, because we might just have loose the connection once
|
/* we need to retry, because we might just have loose the connection once */
|
||||||
if (twice)
|
if (twice)
|
||||||
return false;
|
return false;
|
||||||
PQreset(conn); // reconnect
|
PQreset(conn); /* reconnect */
|
||||||
twice = true;
|
twice = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -198,8 +192,7 @@ pg_version(PGconn *conn, char* major_version)
|
|||||||
log_err(_("Version check PQexec failed: %s"),
|
log_err(_("Version check PQexec failed: %s"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(conn);
|
return NULL;
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
major_version1 = atoi(PQgetvalue(res, 0, 0));
|
major_version1 = atoi(PQgetvalue(res, 0, 0));
|
||||||
@@ -220,12 +213,13 @@ pg_version(PGconn *conn, char* major_version)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
int
|
||||||
guc_setted(PGconn *conn, const char *parameter, const char *op,
|
guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||||
const char *value)
|
const char *value)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
int retval = 1;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
|
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
|
||||||
" WHERE name = '%s' AND setting %s '%s'",
|
" WHERE name = '%s' AND setting %s '%s'",
|
||||||
@@ -236,30 +230,29 @@ guc_setted(PGconn *conn, const char *parameter, const char *op,
|
|||||||
{
|
{
|
||||||
log_err(_("GUC setting check PQexec failed: %s"),
|
log_err(_("GUC setting check PQexec failed: %s"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
retval = -1;
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
if (PQntuples(res) == 0)
|
else if (PQntuples(res) == 0)
|
||||||
{
|
{
|
||||||
PQclear(res);
|
retval = 0;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
return true;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Just like guc_setted except with an extra parameter containing the name of
|
* Just like guc_set except with an extra parameter containing the name of
|
||||||
* the pg datatype so that the comparison can be done properly.
|
* the pg datatype so that the comparison can be done properly.
|
||||||
*/
|
*/
|
||||||
bool
|
int
|
||||||
guc_setted_typed(PGconn *conn, const char *parameter, const char *op,
|
guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||||
const char *value, const char *datatype)
|
const char *value, const char *datatype)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
int retval = 1;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
|
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
|
||||||
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
" WHERE name = '%s' AND setting::%s %s '%s'::%s",
|
||||||
@@ -270,18 +263,16 @@ guc_setted_typed(PGconn *conn, const char *parameter, const char *op,
|
|||||||
{
|
{
|
||||||
log_err(_("GUC setting check PQexec failed: %s"),
|
log_err(_("GUC setting check PQexec failed: %s"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
retval = -1;
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
if (PQntuples(res) == 0)
|
else if (PQntuples(res) == 0)
|
||||||
{
|
{
|
||||||
PQclear(res);
|
retval = 0;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
return true;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -289,7 +280,7 @@ const char *
|
|||||||
get_cluster_size(PGconn *conn)
|
get_cluster_size(PGconn *conn)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
const char *size;
|
const char *size = NULL;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
sqlquery_snprintf(
|
sqlquery_snprintf(
|
||||||
@@ -302,11 +293,12 @@ get_cluster_size(PGconn *conn)
|
|||||||
{
|
{
|
||||||
log_err(_("Get cluster size PQexec failed: %s"),
|
log_err(_("Get cluster size PQexec failed: %s"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
size = PQgetvalue(res, 0, 0);
|
else
|
||||||
|
{
|
||||||
|
size = PQgetvalue(res, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
@@ -367,8 +359,7 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
|
|||||||
log_err(_("Can't get nodes info: %s\n"),
|
log_err(_("Can't get nodes info: %s\n"),
|
||||||
PQerrorMessage(standby_conn));
|
PQerrorMessage(standby_conn));
|
||||||
PQclear(res1);
|
PQclear(res1);
|
||||||
PQfinish(standby_conn);
|
return NULL;
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < PQntuples(res1); i++)
|
for (i = 0; i < PQntuples(res1); i++)
|
||||||
@@ -443,7 +434,7 @@ wait_connection_availability(PGconn *conn, int timeout)
|
|||||||
{
|
{
|
||||||
if (PQconsumeInput(conn) == 0)
|
if (PQconsumeInput(conn) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("wait_connection_availability: could not receive data from master. %s\n"),
|
log_warning(_("wait_connection_availability: could not receive data from connection. %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
11
dbutils.h
11
dbutils.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.h
|
* dbutils.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2012
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -26,14 +26,15 @@ PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
|||||||
PGconn *establishDBConnectionByParams(const char *keywords[],
|
PGconn *establishDBConnectionByParams(const char *keywords[],
|
||||||
const char *values[],
|
const char *values[],
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
bool is_standby(PGconn *conn);
|
int is_standby(PGconn *conn);
|
||||||
bool is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
|
int is_witness(PGconn *conn, char *schema, char *cluster, int node_id);
|
||||||
bool is_pgup(PGconn *conn, int timeout);
|
bool is_pgup(PGconn *conn, int timeout);
|
||||||
char *pg_version(PGconn *conn, char* major_version);
|
char *pg_version(PGconn *conn, char* major_version);
|
||||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
|
int guc_set(PGconn *conn, const char *parameter, const char *op,
|
||||||
const char *value);
|
const char *value);
|
||||||
bool guc_setted_typed(PGconn *conn, const char *parameter, const char *op,
|
int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
|
||||||
const char *value, const char *datatype);
|
const char *value, const char *datatype);
|
||||||
|
|
||||||
const char *get_cluster_size(PGconn *conn);
|
const char *get_cluster_size(PGconn *conn);
|
||||||
PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
|
PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
|
||||||
int *master_id, char *master_conninfo_out);
|
int *master_id, char *master_conninfo_out);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* errcode.h
|
* errcode.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,5 +35,6 @@
|
|||||||
#define ERR_STR_OVERFLOW 10
|
#define ERR_STR_OVERFLOW 10
|
||||||
#define ERR_FAILOVER_FAIL 11
|
#define ERR_FAILOVER_FAIL 11
|
||||||
#define ERR_BAD_SSH 12
|
#define ERR_BAD_SSH 12
|
||||||
|
#define ERR_SYS_FAILURE 13
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
9
log.c
9
log.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.c - Logging methods
|
* log.c - Logging methods
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This module is a set of methods for logging (currently only syslog)
|
* This module is a set of methods for logging (currently only syslog)
|
||||||
*
|
*
|
||||||
@@ -68,7 +68,7 @@ static int detect_log_facility(const char* facility);
|
|||||||
int log_type = REPMGR_STDERR;
|
int log_type = REPMGR_STDERR;
|
||||||
int log_level = LOG_NOTICE;
|
int log_level = LOG_NOTICE;
|
||||||
|
|
||||||
bool logger_init(const char* ident, const char* level, const char* facility)
|
bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility)
|
||||||
{
|
{
|
||||||
|
|
||||||
int l;
|
int l;
|
||||||
@@ -140,6 +140,11 @@ bool logger_init(const char* ident, const char* level, const char* facility)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (*opts->logfile)
|
||||||
|
{
|
||||||
|
freopen(opts->logfile, "a", stderr);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
6
log.h
6
log.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.h
|
* log.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2012
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
#define REPMGR_SYSLOG 1
|
#define REPMGR_SYSLOG 1
|
||||||
#define REPMGR_STDERR 2
|
#define REPMGR_STDERR 2
|
||||||
|
|
||||||
void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...);
|
void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
|
|
||||||
/* Standard error logging */
|
/* Standard error logging */
|
||||||
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
|
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
|
||||||
@@ -114,7 +114,7 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
|
|||||||
|
|
||||||
/* Logger initialisation and shutdown */
|
/* Logger initialisation and shutdown */
|
||||||
bool logger_shutdown(void);
|
bool logger_shutdown(void);
|
||||||
bool logger_init(const char* ident, const char* level, const char* facility);
|
bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility);
|
||||||
void logger_min_verbose(int minimum);
|
void logger_min_verbose(int minimum);
|
||||||
|
|
||||||
extern int log_type;
|
extern int log_type;
|
||||||
|
|||||||
331
repmgr.c
331
repmgr.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.c - Command interpreter for the repmgr
|
* repmgr.c - Command interpreter for the repmgr
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This module is a command-line utility to easily setup a cluster of
|
* This module is a command-line utility to easily setup a cluster of
|
||||||
* hot standby servers for an HA environment
|
* hot standby servers for an HA environment
|
||||||
@@ -85,8 +85,8 @@ bool need_a_node = true;
|
|||||||
bool require_password = false;
|
bool require_password = false;
|
||||||
|
|
||||||
/* Initialization of runtime options */
|
/* Initialization of runtime options */
|
||||||
t_runtime_options runtime_options = { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 };
|
t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
|
||||||
t_configuration_options options = { "", -1, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1 };
|
t_configuration_options options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
|
|
||||||
static char *server_mode = NULL;
|
static char *server_mode = NULL;
|
||||||
static char *server_cmd = NULL;
|
static char *server_cmd = NULL;
|
||||||
@@ -268,11 +268,8 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (optind < argc)
|
if (optind < argc)
|
||||||
{
|
{
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"),
|
log_err(_("%s: too many command-line arguments (first extra is \"%s\")\n"),
|
||||||
progname, argv[optind]);
|
progname, argv[optind]);
|
||||||
usage();
|
usage();
|
||||||
@@ -322,7 +319,7 @@ main(int argc, char **argv)
|
|||||||
* at, but it often requires detailed logging to troubleshoot
|
* at, but it often requires detailed logging to troubleshoot
|
||||||
* problems.
|
* problems.
|
||||||
*/
|
*/
|
||||||
logger_init(progname, options.loglevel, options.logfacility);
|
logger_init(&options, progname, options.loglevel, options.logfacility);
|
||||||
if (runtime_options.verbose)
|
if (runtime_options.verbose)
|
||||||
logger_min_verbose(LOG_INFO);
|
logger_min_verbose(LOG_INFO);
|
||||||
|
|
||||||
@@ -486,29 +483,35 @@ do_master_register(void)
|
|||||||
{
|
{
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret_ver;
|
||||||
|
|
||||||
bool schema_exists = false;
|
bool schema_exists = false;
|
||||||
char schema_quoted[MAXLEN];
|
char schema_quoted[MAXLEN];
|
||||||
char master_version[MAXVERSIONSTR];
|
char master_version[MAXVERSIONSTR];
|
||||||
|
int ret;
|
||||||
|
|
||||||
conn = establishDBConnection(options.conninfo, true);
|
conn = establishDBConnection(options.conninfo, true);
|
||||||
|
|
||||||
/* master should be v9 or better */
|
/* master should be v9 or better */
|
||||||
log_info(_("%s connecting to master database\n"), progname);
|
log_info(_("%s connecting to master database\n"), progname);
|
||||||
pg_version(conn, master_version);
|
ret_ver = pg_version(conn, master_version);
|
||||||
if (strcmp(master_version, "") == 0)
|
if (ret_ver == NULL || strcmp(master_version, "") == 0)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret_ver != NULL)
|
||||||
|
log_err( _("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are a master */
|
/* Check we are a master */
|
||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
if (is_standby(conn))
|
ret = is_standby(conn);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
{
|
{
|
||||||
log_err(_("Trying to register a standby node as a master\n"));
|
log_err(_(ret == 1 ? "Trying to register a standby node as a master\n" :
|
||||||
|
"Connection to node lost!\n"));
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -565,6 +568,22 @@ do_master_register(void)
|
|||||||
PGconn *master_conn;
|
PGconn *master_conn;
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
|
if (runtime_options.force)
|
||||||
|
{
|
||||||
|
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
|
||||||
|
" WHERE id = %d",
|
||||||
|
repmgr_schema, options.node);
|
||||||
|
log_debug(_("master register: %s\n"), sqlquery);
|
||||||
|
|
||||||
|
if (!PQexec(conn, sqlquery))
|
||||||
|
{
|
||||||
|
log_warning(_("Cannot delete node details, %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Ensure there isn't any other master already registered */
|
/* Ensure there isn't any other master already registered */
|
||||||
master_conn = getMasterConnection(conn, repmgr_schema,
|
master_conn = getMasterConnection(conn, repmgr_schema,
|
||||||
options.cluster_name, &id,NULL);
|
options.cluster_name, &id,NULL);
|
||||||
@@ -577,21 +596,6 @@ do_master_register(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Now register the master */
|
/* Now register the master */
|
||||||
if (runtime_options.force)
|
|
||||||
{
|
|
||||||
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
|
|
||||||
" WHERE id = %d",
|
|
||||||
repmgr_schema, options.node);
|
|
||||||
log_debug(_("master register: %s\n"), sqlquery);
|
|
||||||
|
|
||||||
if (!PQexec(conn, sqlquery))
|
|
||||||
{
|
|
||||||
log_warning(_("Cannot delete node details, %s\n"),
|
|
||||||
PQerrorMessage(conn));
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) "
|
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes (id, cluster, name, conninfo, priority) "
|
||||||
"VALUES (%d, '%s', '%s', '%s', %d)",
|
"VALUES (%d, '%s', '%s', '%s', %d)",
|
||||||
@@ -619,10 +623,10 @@ do_standby_register(void)
|
|||||||
{
|
{
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
PGconn *master_conn;
|
PGconn *master_conn;
|
||||||
int master_id;
|
int master_id, ret;
|
||||||
|
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret_ver;
|
||||||
char schema_quoted[MAXLEN];
|
char schema_quoted[MAXLEN];
|
||||||
|
|
||||||
char master_version[MAXVERSIONSTR];
|
char master_version[MAXVERSIONSTR];
|
||||||
@@ -635,18 +639,22 @@ do_standby_register(void)
|
|||||||
|
|
||||||
/* should be v9 or better */
|
/* should be v9 or better */
|
||||||
log_info(_("%s connected to standby, checking its state\n"), progname);
|
log_info(_("%s connected to standby, checking its state\n"), progname);
|
||||||
pg_version(conn, standby_version);
|
ret_ver = pg_version(conn, standby_version);
|
||||||
if (strcmp(standby_version, "") == 0)
|
if (ret_ver == NULL || strcmp(standby_version, "") == 0)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret_ver != NULL)
|
||||||
|
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are a standby */
|
/* Check we are a standby */
|
||||||
if (!is_standby(conn))
|
ret = is_standby(conn);
|
||||||
|
if (ret == 0 || ret == -1)
|
||||||
{
|
{
|
||||||
log_err(_("repmgr: This node should be a standby (%s)\n"), options.conninfo);
|
log_err(_(ret == 0 ? "repmgr: This node should be a standby (%s)\n" :
|
||||||
|
"repmgr: connection to node (%s) lost\n"), options.conninfo);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -697,12 +705,13 @@ do_standby_register(void)
|
|||||||
|
|
||||||
/* master should be v9 or better */
|
/* master should be v9 or better */
|
||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
pg_version(master_conn, master_version);
|
ret_ver = pg_version(master_conn, master_version);
|
||||||
if (strcmp(master_version, "") == 0)
|
if (ret_ver == NULL || strcmp(master_version, "") == 0)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret_ver != NULL)
|
||||||
|
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -742,7 +751,8 @@ do_standby_register(void)
|
|||||||
options.conninfo, options.priority);
|
options.conninfo, options.priority);
|
||||||
log_debug(_("standby register: %s\n"), sqlquery);
|
log_debug(_("standby register: %s\n"), sqlquery);
|
||||||
|
|
||||||
if (!PQexec(master_conn, sqlquery))
|
res = PQexec(master_conn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Cannot insert node details, %s\n"),
|
log_err(_("Cannot insert node details, %s\n"),
|
||||||
PQerrorMessage(master_conn));
|
PQerrorMessage(master_conn));
|
||||||
@@ -765,10 +775,11 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret;
|
||||||
|
const char *cluster_size;
|
||||||
|
|
||||||
int r = 0;
|
int r = 0, retval = SUCCESS;
|
||||||
int i;
|
int i, is_standby_retval;
|
||||||
bool flag_success = false;
|
bool flag_success = false;
|
||||||
bool test_mode = false;
|
bool test_mode = false;
|
||||||
|
|
||||||
@@ -819,45 +830,60 @@ do_standby_clone(void)
|
|||||||
|
|
||||||
/* primary should be v9 or better */
|
/* primary should be v9 or better */
|
||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
pg_version(conn, master_version);
|
ret = pg_version(conn, master_version);
|
||||||
if (strcmp(master_version, "") == 0)
|
if (ret == NULL || strcmp(master_version, "") == 0)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret != NULL)
|
||||||
|
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are cloning a primary node */
|
/* Check we are cloning a primary node */
|
||||||
if (is_standby(conn))
|
is_standby_retval = is_standby(conn);
|
||||||
|
if (is_standby_retval)
|
||||||
{
|
{
|
||||||
|
log_err(_(is_standby_retval == 1 ? "The command should clone a primary node\n" :
|
||||||
|
"Connection to node lost!\n"));
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("\nThe command should clone a primary node\n"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* And check if it is well configured */
|
/* And check if it is well configured */
|
||||||
if (!guc_setted(conn, "wal_level", "=", "hot_standby"))
|
i = guc_set(conn, "wal_level", "=", "hot_standby");
|
||||||
|
if (i == 0 || i == -1)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname);
|
if (i == 0)
|
||||||
|
log_err(_("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
if (!guc_setted_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer"))
|
|
||||||
|
i = guc_set_typed(conn, "wal_keep_segments", ">=", runtime_options.wal_keep_segments, "integer");
|
||||||
|
if (i == 0 || i == -1)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments);
|
if (i == 0)
|
||||||
|
log_err(_("%s needs parameter 'wal_keep_segments' to be set to %s or greater (see the '-w' option or edit the postgresql.conf of the PostgreSQL master.)\n"), progname, runtime_options.wal_keep_segments);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
if (!guc_setted(conn, "archive_mode", "=", "on"))
|
|
||||||
|
i = guc_set(conn, "archive_mode", "=", "on");
|
||||||
|
if (i == 0 || i == -1)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname);
|
if (i == 0)
|
||||||
|
log_err(_("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
if (!guc_setted(conn, "hot_standby", "=", "on"))
|
|
||||||
|
i = guc_set(conn, "hot_standby", "=", "on");
|
||||||
|
if (i == 0 || i == -1)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
log_err(_("%s needs parameter 'hot_standby' to be set to 'on'\n"), progname);
|
if (i == 0)
|
||||||
|
log_err(_("%s needs parameter 'hot_standby' to be set to 'on'\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -952,7 +978,10 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
log_info(_("Successfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn));
|
cluster_size = get_cluster_size(conn);
|
||||||
|
if (cluster_size == NULL)
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
log_info(_("Successfully connected to primary. Current installation size is %s\n"), cluster_size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX master_xlog_directory should be discovered from master configuration
|
* XXX master_xlog_directory should be discovered from master configuration
|
||||||
@@ -1036,6 +1065,8 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"),
|
log_err(_("%s: couldn't use directory %s ...\nUse --force option to force\n"),
|
||||||
progname, local_data_directory);
|
progname, local_data_directory);
|
||||||
|
r = ERR_BAD_CONFIG;
|
||||||
|
retval = ERR_BAD_CONFIG;
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1175,7 +1206,7 @@ stop_backup:
|
|||||||
log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn));
|
log_err(_("Can't stop backup: %s\n"), PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_STOP_BACKUP);
|
exit(retval);
|
||||||
}
|
}
|
||||||
last_wal_segment = PQgetvalue(res, 0, 0);
|
last_wal_segment = PQgetvalue(res, 0, 0);
|
||||||
|
|
||||||
@@ -1243,13 +1274,13 @@ do_standby_promote(void)
|
|||||||
{
|
{
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret;
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
|
|
||||||
PGconn *old_master_conn;
|
PGconn *old_master_conn;
|
||||||
int old_master_id;
|
int old_master_id;
|
||||||
|
|
||||||
int r;
|
int r, retval;
|
||||||
char data_dir[MAXLEN];
|
char data_dir[MAXLEN];
|
||||||
char recovery_file_path[MAXFILENAME];
|
char recovery_file_path[MAXFILENAME];
|
||||||
char recovery_done_path[MAXFILENAME];
|
char recovery_done_path[MAXFILENAME];
|
||||||
@@ -1262,18 +1293,22 @@ do_standby_promote(void)
|
|||||||
|
|
||||||
/* we need v9 or better */
|
/* we need v9 or better */
|
||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
pg_version(conn, standby_version);
|
ret = pg_version(conn, standby_version);
|
||||||
if (strcmp(standby_version, "") == 0)
|
if (ret == NULL || strcmp(standby_version, "") == 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret != NULL)
|
||||||
|
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are in a standby node */
|
/* Check we are in a standby node */
|
||||||
if (!is_standby(conn))
|
retval = is_standby(conn);
|
||||||
|
if (retval == 0 || retval == -1)
|
||||||
{
|
{
|
||||||
log_err(_("%s: The command should be executed on a standby node\n"), progname);
|
log_err(_(retval == 0 ? "%s: The command should be executed on a standby node\n" :
|
||||||
|
"%s: connection to node lost!\n"), progname);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1313,13 +1348,12 @@ do_standby_promote(void)
|
|||||||
rename(recovery_file_path, recovery_done_path);
|
rename(recovery_file_path, recovery_done_path);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We assume the pg_ctl script is in the PATH. Restart and wait for
|
* Restart and wait for the server to finish starting, so that the check
|
||||||
* the server to finish starting, so that the check below will
|
* below will find an active server rather than one starting up. This may
|
||||||
* find an active server rather than one starting up. This may
|
|
||||||
* hang for up the default timeout (60 seconds).
|
* hang for up the default timeout (60 seconds).
|
||||||
*/
|
*/
|
||||||
log_notice(_("%s: restarting server using pg_ctl\n"), progname);
|
log_notice(_("%s: restarting server using %s/pg_ctl\n"), progname, options.pg_bindir);
|
||||||
maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir);
|
maxlen_snprintf(script, "%s/pg_ctl %s -D %s -w -m fast restart", options.pg_bindir, options.pgctl_options, data_dir);
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
@@ -1330,13 +1364,15 @@ do_standby_promote(void)
|
|||||||
/* reconnect to check we got promoted */
|
/* reconnect to check we got promoted */
|
||||||
log_info(_("%s connecting to now restarted database\n"), progname);
|
log_info(_("%s connecting to now restarted database\n"), progname);
|
||||||
conn = establishDBConnection(options.conninfo, true);
|
conn = establishDBConnection(options.conninfo, true);
|
||||||
if (is_standby(conn))
|
retval = is_standby(conn);
|
||||||
|
if (retval)
|
||||||
{
|
{
|
||||||
log_err(_("\n%s: STANDBY PROMOTE failed, this is still a standby node.\n"), progname);
|
log_err(_(retval == 1 ? "%s: STANDBY PROMOTE failed, this is still a standby node.\n" :
|
||||||
|
"%s: connection to node lost!\n"), progname);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"), progname);
|
log_err(_("%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"), progname);
|
||||||
}
|
}
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
return;
|
return;
|
||||||
@@ -1348,13 +1384,13 @@ do_standby_follow(void)
|
|||||||
{
|
{
|
||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret;
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
char master_conninfo[MAXLEN];
|
char master_conninfo[MAXLEN];
|
||||||
PGconn *master_conn;
|
PGconn *master_conn;
|
||||||
int master_id;
|
int master_id;
|
||||||
|
|
||||||
int r;
|
int r, retval;
|
||||||
char data_dir[MAXLEN];
|
char data_dir[MAXLEN];
|
||||||
|
|
||||||
char master_version[MAXVERSIONSTR];
|
char master_version[MAXVERSIONSTR];
|
||||||
@@ -1366,18 +1402,22 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
/* Check we are in a standby node */
|
/* Check we are in a standby node */
|
||||||
log_info(_("%s connected to standby, checking its state\n"), progname);
|
log_info(_("%s connected to standby, checking its state\n"), progname);
|
||||||
if (!is_standby(conn))
|
retval = is_standby(conn);
|
||||||
|
if (retval == 0 || retval == -1)
|
||||||
{
|
{
|
||||||
log_err(_("\n%s: The command should be executed in a standby node\n"), progname);
|
log_err(_(retval == 0 ? "%s: The command should be executed in a standby node\n" :
|
||||||
|
"%s: connection to node lost!\n"), progname);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* should be v9 or better */
|
/* should be v9 or better */
|
||||||
pg_version(conn, standby_version);
|
ret = pg_version(conn, standby_version);
|
||||||
if (strcmp(standby_version, "") == 0)
|
if (ret == NULL || strcmp(standby_version, "") == 0)
|
||||||
{
|
{
|
||||||
log_err(_("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret != NULL)
|
||||||
|
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1390,6 +1430,11 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
if (!is_pgup(conn, options.master_response_timeout))
|
||||||
|
{
|
||||||
|
conn = establishDBConnection(options.conninfo, true);
|
||||||
|
}
|
||||||
|
|
||||||
master_conn = getMasterConnection(conn, repmgr_schema,
|
master_conn = getMasterConnection(conn, repmgr_schema,
|
||||||
options.cluster_name, &master_id,(char *) &master_conninfo);
|
options.cluster_name, &master_id,(char *) &master_conninfo);
|
||||||
}
|
}
|
||||||
@@ -1403,9 +1448,12 @@ do_standby_follow(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are going to point to a master */
|
/* Check we are going to point to a master */
|
||||||
if (is_standby(master_conn))
|
retval = is_standby(master_conn);
|
||||||
|
if (retval)
|
||||||
{
|
{
|
||||||
log_err(_("%s: The node to follow should be a master\n"), progname);
|
log_err(_(retval == 1 ? "%s: The node to follow should be a master\n" :
|
||||||
|
"%s: connection to node lost!\n"), progname);
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1413,10 +1461,11 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
/* should be v9 or better */
|
/* should be v9 or better */
|
||||||
log_info(_("%s connected to master, checking its state\n"), progname);
|
log_info(_("%s connected to master, checking its state\n"), progname);
|
||||||
pg_version(master_conn, master_version);
|
ret = pg_version(master_conn, master_version);
|
||||||
if (strcmp(master_version, "") == 0)
|
if (ret == NULL || strcmp(master_version, "") == 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret != NULL)
|
||||||
|
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1441,7 +1490,7 @@ do_standby_follow(void)
|
|||||||
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
|
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
|
|
||||||
log_info(_("%s Changing standby's master"),progname);
|
log_info(_("%s Changing standby's master\n"),progname);
|
||||||
|
|
||||||
/* Get the data directory full path */
|
/* Get the data directory full path */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT setting "
|
sqlquery_snprintf(sqlquery, "SELECT setting "
|
||||||
@@ -1464,8 +1513,7 @@ do_standby_follow(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
/* Finally, restart the service */
|
/* Finally, restart the service */
|
||||||
/* We assume the pg_ctl script is in the PATH */
|
maxlen_snprintf(script, "%s/pg_ctl %s -w -D %s -m fast restart", options.pg_bindir, options.pgctl_options, data_dir);
|
||||||
maxlen_snprintf(script, "pg_ctl -w -D %s -m fast restart", data_dir);
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
@@ -1483,27 +1531,19 @@ do_witness_create(void)
|
|||||||
PGconn *masterconn;
|
PGconn *masterconn;
|
||||||
PGconn *witnessconn;
|
PGconn *witnessconn;
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN], *ret;
|
||||||
|
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
char buf[MAXLEN];
|
char buf[MAXLEN];
|
||||||
FILE *pg_conf = NULL;
|
FILE *pg_conf = NULL;
|
||||||
|
|
||||||
int r = 0;
|
int r = 0, retval;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
char master_version[MAXVERSIONSTR];
|
char master_version[MAXVERSIONSTR];
|
||||||
|
|
||||||
char master_hba_file[MAXLEN];
|
char master_hba_file[MAXLEN];
|
||||||
|
|
||||||
/* Check this directory could be used as a PGDATA dir */
|
|
||||||
if (!create_pgdir(runtime_options.dest_dir, runtime_options.force))
|
|
||||||
{
|
|
||||||
log_err(_("witness create: couldn't create data directory (\"%s\") for witness"),
|
|
||||||
runtime_options.dest_dir);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Connection parameters for master only */
|
/* Connection parameters for master only */
|
||||||
keywords[0] = "host";
|
keywords[0] = "host";
|
||||||
values[0] = runtime_options.host;
|
values[0] = runtime_options.host;
|
||||||
@@ -1519,18 +1559,22 @@ do_witness_create(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* primary should be v9 or better */
|
/* primary should be v9 or better */
|
||||||
pg_version(masterconn, master_version);
|
ret = pg_version(masterconn, master_version);
|
||||||
if (strcmp(master_version, "") == 0)
|
if (ret == NULL || strcmp(master_version, "") == 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
if (ret != NULL)
|
||||||
|
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check we are connecting to a primary node */
|
/* Check we are connecting to a primary node */
|
||||||
if (is_standby(masterconn))
|
retval = is_standby(masterconn);
|
||||||
|
if (retval)
|
||||||
{
|
{
|
||||||
log_err(_("The command should not run on a standby node\n"));
|
log_err(_(retval == 1 ? "The command should not run on a standby node\n" :
|
||||||
|
"Connection to node lost!\n"));
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1545,6 +1589,15 @@ do_witness_create(void)
|
|||||||
exit(ERR_BAD_SSH);
|
exit(ERR_BAD_SSH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check this directory could be used as a PGDATA dir */
|
||||||
|
if (!create_pgdir(runtime_options.dest_dir, runtime_options.force))
|
||||||
|
{
|
||||||
|
log_err(_("witness create: couldn't create data directory (\"%s\") for witness"),
|
||||||
|
runtime_options.dest_dir);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To create a witness server we need to:
|
* To create a witness server we need to:
|
||||||
* 1) initialize the cluster
|
* 1) initialize the cluster
|
||||||
@@ -1553,8 +1606,7 @@ do_witness_create(void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* Create the cluster for witness */
|
/* Create the cluster for witness */
|
||||||
/* We assume the pg_ctl script is in the PATH */
|
sprintf(script, "%s/pg_ctl %s -D %s init -o \"-W\"", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir);
|
||||||
sprintf(script, "pg_ctl -D %s init -o \"-W\"", runtime_options.dest_dir);
|
|
||||||
log_info("Initialize cluster for witness: %s.\n", script);
|
log_info("Initialize cluster for witness: %s.\n", script);
|
||||||
|
|
||||||
r = system(script);
|
r = system(script);
|
||||||
@@ -1573,7 +1625,7 @@ do_witness_create(void)
|
|||||||
pg_conf = fopen(buf, "a");
|
pg_conf = fopen(buf, "a");
|
||||||
if (pg_conf == NULL)
|
if (pg_conf == NULL)
|
||||||
{
|
{
|
||||||
log_err(_("\n%s: could not open \"%s\" for adding extra config: %s\n"), progname, buf, strerror(errno));
|
log_err(_("%s: could not open \"%s\" for adding extra config: %s\n"), progname, buf, strerror(errno));
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1627,7 +1679,7 @@ do_witness_create(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* start new instance */
|
/* start new instance */
|
||||||
sprintf(script, "pg_ctl -w -D %s start", runtime_options.dest_dir);
|
sprintf(script, "%s/pg_ctl %s -w -D %s start", options.pg_bindir, options.pgctl_options, runtime_options.dest_dir);
|
||||||
log_info(_("Start cluster for witness: %s"), script);
|
log_info(_("Start cluster for witness: %s"), script);
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
@@ -1640,7 +1692,7 @@ do_witness_create(void)
|
|||||||
/* register ourselves in the master */
|
/* register ourselves in the master */
|
||||||
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority, witness) "
|
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes(id, cluster, name, conninfo, priority, witness) "
|
||||||
"VALUES (%d, '%s', '%s', '%s', %d, true)",
|
"VALUES (%d, '%s', '%s', '%s', %d, true)",
|
||||||
repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo);
|
repmgr_schema, options.node, options.cluster_name, options.node_name, options.conninfo, options.priority);
|
||||||
|
|
||||||
log_debug(_("witness create: %s"), sqlquery);
|
log_debug(_("witness create: %s"), sqlquery);
|
||||||
if (!PQexec(masterconn, sqlquery))
|
if (!PQexec(masterconn, sqlquery))
|
||||||
@@ -1680,8 +1732,8 @@ do_witness_create(void)
|
|||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
log_err(_("\n\n%s: Replicator manager \n"), progname);
|
fprintf(stderr, _("\n\n%s: Replicator manager \n"), progname);
|
||||||
log_err(_("Try \"%s --help\" for more information.\n"), progname);
|
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1691,39 +1743,44 @@ help(const char *progname)
|
|||||||
{
|
{
|
||||||
printf(_("\n%s: Replicator manager \n"), progname);
|
printf(_("\n%s: Replicator manager \n"), progname);
|
||||||
printf(_("Usage:\n"));
|
printf(_("Usage:\n"));
|
||||||
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
printf(_(" %s [OPTIONS] master {register}\n"), progname);
|
||||||
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
|
||||||
progname);
|
progname);
|
||||||
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
printf(_(" %s [OPTIONS] cluster {show|cleanup}\n"), progname);
|
||||||
printf(_("\nGeneral options:\n"));
|
printf(_("\nGeneral options:\n"));
|
||||||
printf(_(" --help show this help, then exit\n"));
|
printf(_(" --help show this help, then exit\n"));
|
||||||
printf(_(" --version output version information, then exit\n"));
|
printf(_(" --version output version information, then exit\n"));
|
||||||
printf(_(" --verbose output verbose activity information\n"));
|
printf(_(" --verbose output verbose activity information\n"));
|
||||||
printf(_("\nConnection options:\n"));
|
printf(_("\nConnection options:\n"));
|
||||||
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
|
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
|
||||||
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
|
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
|
||||||
printf(_(" -p, --port=PORT database server port\n"));
|
printf(_(" -p, --port=PORT database server port\n"));
|
||||||
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
|
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
|
||||||
printf(_("\nConfiguration options:\n"));
|
printf(_("\nConfiguration options:\n"));
|
||||||
printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n"));
|
printf(_(" -D, --data-dir=DIR local directory where the files will be\n" \
|
||||||
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
" copied to\n"));
|
||||||
printf(_(" -f, --config_file=PATH path to the configuration file\n"));
|
printf(_(" -l, --local-port=PORT standby or witness server local port\n"));
|
||||||
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
printf(_(" -f, --config_file=PATH path to the configuration file\n"));
|
||||||
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
|
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
|
||||||
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
|
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC\n" \
|
||||||
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of history\n"));
|
" wal_keep_segments (default: 5000)\n"));
|
||||||
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
|
printf(_(" -I, --ignore-rsync-warning ignore rsync partial transfer warning\n"));
|
||||||
printf(_(" -W, --wait wait for a master to appear"));
|
printf(_(" -k, --keep-history=VALUE keeps indicated number of days of\n" \
|
||||||
|
" history\n"));
|
||||||
|
printf(_(" -F, --force force potentially dangerous operations\n" \
|
||||||
|
" to happen\n"));
|
||||||
|
printf(_(" -W, --wait wait for a master to appear\n"));
|
||||||
|
|
||||||
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
|
printf(_("\n%s performs some tasks like clone a node, promote it or making follow\n"), progname);
|
||||||
printf(_("or making follow another node and then exits.\n"));
|
printf(_("another node and then exits.\n\n"));
|
||||||
printf(_("COMMANDS:\n"));
|
printf(_("COMMANDS:\n"));
|
||||||
printf(_(" master register - registers the master in a cluster\n"));
|
printf(_(" master register - registers the master in a cluster\n"));
|
||||||
printf(_(" standby register - registers a standby in a cluster\n"));
|
printf(_(" standby register - registers a standby in a cluster\n"));
|
||||||
printf(_(" standby clone [node] - allows creation of a new standby\n"));
|
printf(_(" standby clone [node] - allows creation of a new standby\n"));
|
||||||
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
|
printf(_(" standby promote - allows manual promotion of a specific standby into\n" \
|
||||||
printf(_("new master in the event of a failover\n"));
|
" a new master in the event of a failover\n"));
|
||||||
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
|
printf(_(" standby follow - allows the standby to re-point itself to a new\n" \
|
||||||
|
" master\n"));
|
||||||
printf(_(" cluster show - print node information\n"));
|
printf(_(" cluster show - print node information\n"));
|
||||||
printf(_(" cluster cleanup - cleans monitor's history\n"));
|
printf(_(" cluster cleanup - cleans monitor's history\n"));
|
||||||
}
|
}
|
||||||
@@ -1790,7 +1847,7 @@ test_ssh_connection(char *host, char *remote_user)
|
|||||||
else
|
else
|
||||||
maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH);
|
maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s", options.ssh_options, host, remote_user, TRUEBIN_PATH);
|
||||||
|
|
||||||
log_debug(_("command is: %s"), script);
|
log_debug(_("command is: %s\n"), script);
|
||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
log_info(_("Can not connect to the remote host (%s)\n"), host);
|
log_info(_("Can not connect to the remote host (%s)\n"), host);
|
||||||
@@ -1857,7 +1914,7 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
|
|||||||
log_info(_("rsync partial transfer warning ignored\n"));
|
log_info(_("rsync partial transfer warning ignored\n"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
log_warning( _("\nrsync completed with return code 24: "
|
log_warning( _("rsync completed with return code 24: "
|
||||||
"\"Partial transfer due to vanished source files\".\n"
|
"\"Partial transfer due to vanished source files\".\n"
|
||||||
"This can happen because of normal operation "
|
"This can happen because of normal operation "
|
||||||
"on the master server, but it may indicate an "
|
"on the master server, but it may indicate an "
|
||||||
|
|||||||
@@ -34,3 +34,29 @@ loglevel=NOTICE
|
|||||||
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||||
# Default: STDERR
|
# Default: STDERR
|
||||||
logfacility=STDERR
|
logfacility=STDERR
|
||||||
|
|
||||||
|
# path to pg_ctl executable
|
||||||
|
pg_bindir=/usr/bin/
|
||||||
|
|
||||||
|
#
|
||||||
|
# you may add command line arguments for pg_ctl
|
||||||
|
#
|
||||||
|
# pg_ctl_options='-s'
|
||||||
|
|
||||||
|
#
|
||||||
|
# redirect stderr to a logfile
|
||||||
|
#
|
||||||
|
# logfile='/var/log/repmgr.log'
|
||||||
|
|
||||||
|
#
|
||||||
|
# change monitoring interval; default is 2s
|
||||||
|
#
|
||||||
|
# monitor_interval_secs=2
|
||||||
|
|
||||||
|
#
|
||||||
|
# change wait time for master; before we bail out and exit when the
|
||||||
|
# master disappears, we wait 6 * retry_promote_interval_secs seconds;
|
||||||
|
# by default this would be half an hour (since sleep_delay default
|
||||||
|
# value is 300)
|
||||||
|
#
|
||||||
|
# retry_promote_interval_secs=300
|
||||||
|
|||||||
4
repmgr.h
4
repmgr.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.h
|
* repmgr.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2012
|
* Copyright (c) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -69,6 +69,6 @@ typedef struct
|
|||||||
int keep_history;
|
int keep_history;
|
||||||
} t_runtime_options;
|
} t_runtime_options;
|
||||||
|
|
||||||
#define SLEEP_MONITOR 2
|
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, "", "", 0 }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.sql
|
* repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
400
repmgrd.c
400
repmgrd.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd.c - Replication manager daemon
|
* repmgrd.c - Replication manager daemon
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This module connects to the nodes of a replication cluster and monitors
|
* This module connects to the nodes of a replication cluster and monitors
|
||||||
* how far are they from master
|
* how far are they from master
|
||||||
@@ -22,6 +22,9 @@
|
|||||||
|
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@@ -65,6 +68,7 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0};
|
|||||||
a.xrecoff = uxrecoff
|
a.xrecoff = uxrecoff
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct to keep info about the nodes, used in the voting process in
|
* Struct to keep info about the nodes, used in the voting process in
|
||||||
* do_failover()
|
* do_failover()
|
||||||
@@ -103,20 +107,22 @@ char repmgr_schema[MAXLEN];
|
|||||||
|
|
||||||
bool failover_done = false;
|
bool failover_done = false;
|
||||||
|
|
||||||
|
char *pid_file = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* should initialize with {0} to be ANSI complaint ? but this raises
|
* should initialize with {0} to be ANSI complaint ? but this raises
|
||||||
* error with gcc -Wall
|
* error with gcc -Wall
|
||||||
*/
|
*/
|
||||||
t_configuration_options config = {};
|
t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||||
|
|
||||||
static void help(const char* progname);
|
static void help(const char* progname);
|
||||||
static void usage(void);
|
static void usage(void);
|
||||||
static void checkClusterConfiguration(PGconn *conn, PGconn *primary);
|
static void checkClusterConfiguration(PGconn *conn);
|
||||||
static void checkNodeConfiguration(char *conninfo);
|
static void checkNodeConfiguration(void);
|
||||||
|
|
||||||
static void StandbyMonitor(void);
|
static void StandbyMonitor(void);
|
||||||
static void WitnessMonitor(void);
|
static void WitnessMonitor(void);
|
||||||
static bool CheckPrimaryConnection(void);
|
static bool CheckConnection(PGconn *conn, const char *type);
|
||||||
static void update_shared_memory(char *last_wal_standby_applied);
|
static void update_shared_memory(char *last_wal_standby_applied);
|
||||||
static void update_registration(void);
|
static void update_registration(void);
|
||||||
static void do_failover(void);
|
static void do_failover(void);
|
||||||
@@ -131,7 +137,15 @@ static volatile sig_atomic_t got_SIGHUP = false;
|
|||||||
|
|
||||||
static void handle_sighup(SIGNAL_ARGS);
|
static void handle_sighup(SIGNAL_ARGS);
|
||||||
static void handle_sigint(SIGNAL_ARGS);
|
static void handle_sigint(SIGNAL_ARGS);
|
||||||
|
|
||||||
|
static void terminate(int retval);
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
static void setup_event_handlers(void);
|
static void setup_event_handlers(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void do_daemonize();
|
||||||
|
static void check_and_create_pid_file(const char *pid_file);
|
||||||
|
|
||||||
#define CloseConnections() \
|
#define CloseConnections() \
|
||||||
if (PQisBusy(primaryConn) == 1) \
|
if (PQisBusy(primaryConn) == 1) \
|
||||||
@@ -150,13 +164,16 @@ main(int argc, char **argv)
|
|||||||
{"config", required_argument, NULL, 'f'},
|
{"config", required_argument, NULL, 'f'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
{"monitoring-history", no_argument, NULL, 'm'},
|
{"monitoring-history", no_argument, NULL, 'm'},
|
||||||
|
{"daemonize", no_argument, NULL, 'd'},
|
||||||
|
{"pid-file", required_argument, NULL, 'p'},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
int optindex;
|
int optindex;
|
||||||
int c;
|
int c, ret;
|
||||||
|
bool daemonize = false;
|
||||||
|
|
||||||
char standby_version[MAXVERSIONSTR];
|
char standby_version[MAXVERSIONSTR], *ret_ver;
|
||||||
|
|
||||||
progname = get_progname(argv[0]);
|
progname = get_progname(argv[0]);
|
||||||
|
|
||||||
@@ -174,7 +191,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "f:v:m", long_options, &optindex)) != -1)
|
while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
@@ -187,13 +204,31 @@ main(int argc, char **argv)
|
|||||||
case 'm':
|
case 'm':
|
||||||
monitoring_history = true;
|
monitoring_history = true;
|
||||||
break;
|
break;
|
||||||
|
case 'd':
|
||||||
|
daemonize = true;
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
pid_file = optarg;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (daemonize)
|
||||||
|
{
|
||||||
|
do_daemonize();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pid_file)
|
||||||
|
{
|
||||||
|
check_and_create_pid_file(pid_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
setup_event_handlers();
|
setup_event_handlers();
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the configuration file: repmgr.conf
|
* Read the configuration file: repmgr.conf
|
||||||
@@ -203,13 +238,21 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
log_err(_("Node information is missing. "
|
log_err(_("Node information is missing. "
|
||||||
"Check the configuration file, or provide one if you have not done so.\n"));
|
"Check the configuration file, or provide one if you have not done so.\n"));
|
||||||
exit(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger_init(progname, local_options.loglevel, local_options.logfacility);
|
freopen("/dev/null", "r", stdin);
|
||||||
|
freopen("/dev/null", "w", stdout);
|
||||||
|
|
||||||
|
logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility);
|
||||||
if (verbose)
|
if (verbose)
|
||||||
logger_min_verbose(LOG_INFO);
|
logger_min_verbose(LOG_INFO);
|
||||||
|
|
||||||
|
if (log_type == REPMGR_SYSLOG)
|
||||||
|
{
|
||||||
|
freopen("/dev/null", "w", stderr);
|
||||||
|
}
|
||||||
|
|
||||||
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
|
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
|
||||||
|
|
||||||
log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo);
|
log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo);
|
||||||
@@ -217,12 +260,12 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/* should be v9 or better */
|
/* should be v9 or better */
|
||||||
log_info(_("%s Connected to database, checking its state\n"), progname);
|
log_info(_("%s Connected to database, checking its state\n"), progname);
|
||||||
pg_version(myLocalConn, standby_version);
|
ret_ver = pg_version(myLocalConn, standby_version);
|
||||||
if (strcmp(standby_version, "") == 0)
|
if (ret_ver == NULL || strcmp(standby_version, "") == 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
if(ret_ver != NULL)
|
||||||
PQfinish(myLocalConn);
|
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -239,12 +282,25 @@ main(int argc, char **argv)
|
|||||||
* Set my server mode, establish a connection to primary
|
* Set my server mode, establish a connection to primary
|
||||||
* and start monitor
|
* and start monitor
|
||||||
*/
|
*/
|
||||||
if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node))
|
ret = is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node);
|
||||||
|
|
||||||
|
if (ret == 1)
|
||||||
myLocalMode = WITNESS_MODE;
|
myLocalMode = WITNESS_MODE;
|
||||||
else if (is_standby(myLocalConn))
|
else if (ret == 0)
|
||||||
myLocalMode = STANDBY_MODE;
|
{
|
||||||
else /* is the master */
|
ret = is_standby(myLocalConn);
|
||||||
myLocalMode = PRIMARY_MODE;
|
|
||||||
|
if (ret == 1)
|
||||||
|
myLocalMode = STANDBY_MODE;
|
||||||
|
else if (ret == 0) /* is the master */
|
||||||
|
myLocalMode = PRIMARY_MODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* XXX we did this before changing is_standby() to return int; we
|
||||||
|
* should not exit at this point, but for now we do until we have a
|
||||||
|
* better strategy */
|
||||||
|
if (ret == -1)
|
||||||
|
terminate(1);
|
||||||
|
|
||||||
switch (myLocalMode)
|
switch (myLocalMode)
|
||||||
{
|
{
|
||||||
@@ -253,8 +309,8 @@ main(int argc, char **argv)
|
|||||||
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
|
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
|
||||||
primaryConn = myLocalConn;
|
primaryConn = myLocalConn;
|
||||||
|
|
||||||
checkClusterConfiguration(myLocalConn, primaryConn);
|
checkClusterConfiguration(myLocalConn);
|
||||||
checkNodeConfiguration(local_options.conninfo);
|
checkNodeConfiguration();
|
||||||
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
if (reload_configuration(config_file, &local_options))
|
||||||
{
|
{
|
||||||
@@ -269,26 +325,26 @@ main(int argc, char **argv)
|
|||||||
/* Check that primary is still alive, and standbies are sending info */
|
/* Check that primary is still alive, and standbies are sending info */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Every SLEEP_MONITOR seconds, do master checks
|
* Every local_options.monitor_interval_secs seconds, do master checks
|
||||||
* XXX
|
* XXX
|
||||||
* Check that standbies are sending info
|
* Check that standbies are sending info
|
||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (CheckPrimaryConnection())
|
if (CheckConnection(primaryConn, "master"))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
CheckActiveStandbiesConnections();
|
CheckActiveStandbiesConnections();
|
||||||
CheckInactiveStandbies();
|
CheckInactiveStandbies();
|
||||||
*/
|
*/
|
||||||
sleep(SLEEP_MONITOR);
|
sleep(local_options.monitor_interval_secs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* XXX
|
/* XXX
|
||||||
* May we do something more verbose ?
|
* May we do something more verbose ?
|
||||||
*/
|
*/
|
||||||
exit(1);
|
terminate(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
@@ -299,6 +355,12 @@ main(int argc, char **argv)
|
|||||||
PQfinish(myLocalConn);
|
PQfinish(myLocalConn);
|
||||||
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
||||||
primaryConn = myLocalConn;
|
primaryConn = myLocalConn;
|
||||||
|
|
||||||
|
if (*local_options.logfile)
|
||||||
|
{
|
||||||
|
freopen(local_options.logfile, "a", stderr);
|
||||||
|
}
|
||||||
|
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
@@ -315,12 +377,11 @@ main(int argc, char **argv)
|
|||||||
&primary_options.node, NULL);
|
&primary_options.node, NULL);
|
||||||
if (primaryConn == NULL)
|
if (primaryConn == NULL)
|
||||||
{
|
{
|
||||||
CloseConnections();
|
terminate(ERR_BAD_CONFIG);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
checkClusterConfiguration(myLocalConn, primaryConn);
|
checkClusterConfiguration(myLocalConn);
|
||||||
checkNodeConfiguration(local_options.conninfo);
|
checkNodeConfiguration();
|
||||||
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
if (reload_configuration(config_file, &local_options))
|
||||||
{
|
{
|
||||||
@@ -330,7 +391,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Every SLEEP_MONITOR seconds, do checks
|
* Every local_options.monitor_interval_secs seconds, do checks
|
||||||
*/
|
*/
|
||||||
if (myLocalMode == WITNESS_MODE)
|
if (myLocalMode == WITNESS_MODE)
|
||||||
{
|
{
|
||||||
@@ -347,7 +408,7 @@ main(int argc, char **argv)
|
|||||||
WitnessMonitor();
|
WitnessMonitor();
|
||||||
else if (myLocalMode == STANDBY_MODE)
|
else if (myLocalMode == STANDBY_MODE)
|
||||||
StandbyMonitor();
|
StandbyMonitor();
|
||||||
sleep(SLEEP_MONITOR);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
{
|
{
|
||||||
@@ -396,7 +457,7 @@ WitnessMonitor(void)
|
|||||||
* Check if the master is still available, if after 5 minutes of retries
|
* Check if the master is still available, if after 5 minutes of retries
|
||||||
* we cannot reconnect, return false.
|
* we cannot reconnect, return false.
|
||||||
*/
|
*/
|
||||||
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds
|
CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
|
||||||
|
|
||||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -404,8 +465,7 @@ WitnessMonitor(void)
|
|||||||
* If we can't reconnect, just exit...
|
* If we can't reconnect, just exit...
|
||||||
* XXX we need to make witness connect to the new master
|
* XXX we need to make witness connect to the new master
|
||||||
*/
|
*/
|
||||||
PQfinish(myLocalConn);
|
terminate(0);
|
||||||
exit(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fast path for the case where no history is requested */
|
/* Fast path for the case where no history is requested */
|
||||||
@@ -475,13 +535,19 @@ StandbyMonitor(void)
|
|||||||
unsigned long long int lsn_standby_received;
|
unsigned long long int lsn_standby_received;
|
||||||
unsigned long long int lsn_standby_applied;
|
unsigned long long int lsn_standby_applied;
|
||||||
|
|
||||||
int connection_retries;
|
int connection_retries, ret;
|
||||||
|
bool did_retry = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the master is still available, if after 5 minutes of retries
|
* Check if the master is still available, if after 5 minutes of retries
|
||||||
* we cannot reconnect, try to get a new master.
|
* we cannot reconnect, try to get a new master.
|
||||||
*/
|
*/
|
||||||
CheckPrimaryConnection(); // this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds
|
CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
|
||||||
|
|
||||||
|
if (!CheckConnection(myLocalConn, "standby"))
|
||||||
|
{
|
||||||
|
terminate(1);
|
||||||
|
}
|
||||||
|
|
||||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -501,15 +567,17 @@ StandbyMonitor(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("We haven't found a new master, waiting before retry...\n"));
|
log_err(_("We haven't found a new master, waiting before retry...\n"));
|
||||||
/* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */
|
/* wait local_options.retry_promote_interval_secs minutes before retries,
|
||||||
sleep(300);
|
* after 6 failures (6 * local_options.monitor_interval_secs
|
||||||
|
* seconds) we stop trying */
|
||||||
|
sleep(local_options.retry_promote_interval_secs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_err(_("We couldn't reconnect for long enough, exiting...\n"));
|
log_err(_("We couldn't reconnect for long enough, exiting...\n"));
|
||||||
exit(ERR_DB_CON);
|
terminate(ERR_DB_CON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
||||||
@@ -524,11 +592,32 @@ StandbyMonitor(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check if we still are a standby, we could have been promoted */
|
/* Check if we still are a standby, we could have been promoted */
|
||||||
if (!is_standby(myLocalConn))
|
do {
|
||||||
|
ret = is_standby(myLocalConn);
|
||||||
|
|
||||||
|
switch (ret)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
||||||
|
terminate(1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case -1:
|
||||||
|
log_err(_("Standby node disappeared, trying to reconnect...\n"));
|
||||||
|
did_retry = true;
|
||||||
|
|
||||||
|
if (!CheckConnection(myLocalConn, "standby"))
|
||||||
|
{
|
||||||
|
terminate(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while(ret == -1);
|
||||||
|
|
||||||
|
if (did_retry)
|
||||||
{
|
{
|
||||||
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
log_info(_("standby connection got back up again!\n"));
|
||||||
CloseConnections();
|
|
||||||
exit(ERR_PROMOTED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fast path for the case where no history is requested */
|
/* Fast path for the case where no history is requested */
|
||||||
@@ -652,8 +741,7 @@ do_failover(void)
|
|||||||
{
|
{
|
||||||
log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn));
|
log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(myLocalConn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(myLocalConn);
|
terminate(ERR_DB_QUERY);
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -672,18 +760,21 @@ do_failover(void)
|
|||||||
/* Initialize on false so if we can't reach this node we know that later */
|
/* Initialize on false so if we can't reach this node we know that later */
|
||||||
nodes[i].is_visible = false;
|
nodes[i].is_visible = false;
|
||||||
nodes[i].is_ready = false;
|
nodes[i].is_ready = false;
|
||||||
|
|
||||||
XLAssignValue(nodes[i].xlog_location, 0, 0);
|
XLAssignValue(nodes[i].xlog_location, 0, 0);
|
||||||
|
|
||||||
log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
|
log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
|
||||||
progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false");
|
progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false");
|
||||||
|
|
||||||
nodeConn = establishDBConnection(nodes[i].conninfostr, false);
|
nodeConn = establishDBConnection(nodes[i].conninfostr, false);
|
||||||
|
|
||||||
/* if we can't see the node just skip it */
|
/* if we can't see the node just skip it */
|
||||||
if (PQstatus(nodeConn) != CONNECTION_OK)
|
if (PQstatus(nodeConn) != CONNECTION_OK)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
visible_nodes++;
|
visible_nodes++;
|
||||||
nodes[i].is_visible = true;
|
nodes[i].is_visible = true;
|
||||||
|
|
||||||
PQfinish(nodeConn);
|
PQfinish(nodeConn);
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -699,7 +790,7 @@ do_failover(void)
|
|||||||
log_err(_("Can't reach most of the nodes.\n"
|
log_err(_("Can't reach most of the nodes.\n"
|
||||||
"Let the other standby servers decide which one will be the primary.\n"
|
"Let the other standby servers decide which one will be the primary.\n"
|
||||||
"Manual action will be needed to readd this node to the cluster.\n"));
|
"Manual action will be needed to readd this node to the cluster.\n"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Query all the nodes to determine which ones are ready */
|
/* Query all the nodes to determine which ones are ready */
|
||||||
@@ -720,9 +811,12 @@ do_failover(void)
|
|||||||
if (PQstatus(nodeConn) != CONNECTION_OK)
|
if (PQstatus(nodeConn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uxlogid = 0;
|
||||||
|
uxrecoff = 0;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
|
||||||
res = PQexec(nodeConn, sqlquery);
|
res = PQexec(nodeConn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -731,7 +825,7 @@ do_failover(void)
|
|||||||
log_info(_("Connection details: %s\n"), nodes[i].conninfostr);
|
log_info(_("Connection details: %s\n"), nodes[i].conninfostr);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(nodeConn);
|
PQfinish(nodeConn);
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
|
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
|
||||||
@@ -746,7 +840,7 @@ do_failover(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(nodeConn);
|
PQfinish(nodeConn);
|
||||||
log_info(_("InvalidXLogRecPtr detected in a standby\n"));
|
log_info(_("InvalidXLogRecPtr detected in a standby\n"));
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);
|
XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);
|
||||||
@@ -761,11 +855,10 @@ do_failover(void)
|
|||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn));
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn));
|
||||||
PQfinish(myLocalConn);
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
||||||
update_shared_memory(last_wal_standby_applied);
|
update_shared_memory(last_wal_standby_applied);
|
||||||
exit(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
@@ -810,6 +903,9 @@ do_failover(void)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uxlogid = 0;
|
||||||
|
uxrecoff = 0;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema);
|
sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema);
|
||||||
res = PQexec(nodeConn, sqlquery);
|
res = PQexec(nodeConn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -817,12 +913,22 @@ do_failover(void)
|
|||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn));
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(nodeConn);
|
PQfinish(nodeConn);
|
||||||
exit(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
|
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
|
||||||
|
{
|
||||||
log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0));
|
log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
|
/* we can't do anything but fail at this point... */
|
||||||
|
if (*PQgetvalue(res, 0, 0) == '\0')
|
||||||
|
{
|
||||||
|
log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(nodeConn);
|
PQfinish(nodeConn);
|
||||||
/* If position is 0/0, keep checking */
|
/* If position is 0/0, keep checking */
|
||||||
@@ -891,7 +997,7 @@ do_failover(void)
|
|||||||
if (best_candidate.is_witness)
|
if (best_candidate.is_witness)
|
||||||
{
|
{
|
||||||
log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname);
|
log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname);
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* wait */
|
/* wait */
|
||||||
@@ -901,11 +1007,17 @@ do_failover(void)
|
|||||||
log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
|
log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
|
||||||
progname);
|
progname);
|
||||||
log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command);
|
log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command);
|
||||||
|
|
||||||
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
|
{
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
|
||||||
r = system(local_options.promote_command);
|
r = system(local_options.promote_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname);
|
log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (find_best)
|
else if (find_best)
|
||||||
@@ -921,17 +1033,22 @@ do_failover(void)
|
|||||||
* New Primary need some time to be promoted.
|
* New Primary need some time to be promoted.
|
||||||
* The follow command should take care of that.
|
* The follow command should take care of that.
|
||||||
*/
|
*/
|
||||||
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
|
{
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
|
||||||
r = system(local_options.follow_command);
|
r = system(local_options.follow_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname);
|
log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname);
|
||||||
exit(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname);
|
log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname);
|
||||||
exit(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* to force it to re-calculate mode and master node */
|
/* to force it to re-calculate mode and master node */
|
||||||
@@ -943,7 +1060,7 @@ do_failover(void)
|
|||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
CheckPrimaryConnection(void)
|
CheckConnection(PGconn *conn, const char *type)
|
||||||
{
|
{
|
||||||
int connection_retries;
|
int connection_retries;
|
||||||
|
|
||||||
@@ -955,10 +1072,11 @@ CheckPrimaryConnection(void)
|
|||||||
*/
|
*/
|
||||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
{
|
{
|
||||||
if (!is_pgup(primaryConn, local_options.master_response_timeout))
|
if (!is_pgup(conn, local_options.master_response_timeout))
|
||||||
{
|
{
|
||||||
log_warning(_("%s: Connection to master has been lost, trying to recover... %i seconds before failover decision\n"),
|
log_warning(_("%s: Connection to %s has been lost, trying to recover... %i seconds before failover decision\n"),
|
||||||
progname,
|
progname,
|
||||||
|
type,
|
||||||
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
|
(local_options.reconnect_intvl * (local_options.reconnect_attempts - connection_retries)));
|
||||||
/* wait local_options.reconnect_intvl seconds between retries */
|
/* wait local_options.reconnect_intvl seconds between retries */
|
||||||
sleep(local_options.reconnect_intvl);
|
sleep(local_options.reconnect_intvl);
|
||||||
@@ -967,12 +1085,12 @@ CheckPrimaryConnection(void)
|
|||||||
{
|
{
|
||||||
if ( connection_retries > 0)
|
if ( connection_retries > 0)
|
||||||
{
|
{
|
||||||
log_info(_("%s: Connection to master has been restored.\n"), progname);
|
log_info(_("%s: Connection to %s has been restored.\n"), progname, type);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!is_pgup(primaryConn, local_options.master_response_timeout))
|
if (!is_pgup(conn, local_options.master_response_timeout))
|
||||||
{
|
{
|
||||||
log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"), progname);
|
log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"), progname);
|
||||||
/* XXX Anything else to do here? */
|
/* XXX Anything else to do here? */
|
||||||
@@ -983,7 +1101,7 @@ CheckPrimaryConnection(void)
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
checkClusterConfiguration(PGconn *conn, PGconn *primary)
|
checkClusterConfiguration(PGconn *conn)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
@@ -997,8 +1115,7 @@ checkClusterConfiguration(PGconn *conn, PGconn *primary)
|
|||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
|
log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
CloseConnections();
|
terminate(ERR_DB_QUERY);
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1012,15 +1129,14 @@ checkClusterConfiguration(PGconn *conn, PGconn *primary)
|
|||||||
{
|
{
|
||||||
log_err(_("The replication cluster is not configured\n"));
|
log_err(_("The replication cluster is not configured\n"));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
CloseConnections();
|
terminate(ERR_BAD_CONFIG);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
checkNodeConfiguration(char *conninfo)
|
checkNodeConfiguration(void)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
|
|
||||||
@@ -1039,8 +1155,7 @@ checkNodeConfiguration(char *conninfo)
|
|||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn));
|
log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
CloseConnections();
|
terminate(ERR_BAD_CONFIG);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1055,8 +1170,7 @@ checkNodeConfiguration(char *conninfo)
|
|||||||
if (myLocalMode == WITNESS_MODE)
|
if (myLocalMode == WITNESS_MODE)
|
||||||
{
|
{
|
||||||
log_err(_("The witness is not configured\n"));
|
log_err(_("The witness is not configured\n"));
|
||||||
CloseConnections();
|
terminate(ERR_BAD_CONFIG);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Adding the node */
|
/* Adding the node */
|
||||||
@@ -1073,8 +1187,7 @@ checkNodeConfiguration(char *conninfo)
|
|||||||
{
|
{
|
||||||
log_err(_("Cannot insert node details, %s\n"),
|
log_err(_("Cannot insert node details, %s\n"),
|
||||||
PQerrorMessage(primaryConn));
|
PQerrorMessage(primaryConn));
|
||||||
CloseConnections();
|
terminate(ERR_BAD_CONFIG);
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1116,6 +1229,8 @@ void help(const char *progname)
|
|||||||
printf(_(" --verbose output verbose activity information\n"));
|
printf(_(" --verbose output verbose activity information\n"));
|
||||||
printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n"));
|
printf(_(" --monitoring-history track advance or lag of the replication in every standby in repl_monitor\n"));
|
||||||
printf(_(" -f, --config_file=PATH configuration file\n"));
|
printf(_(" -f, --config_file=PATH configuration file\n"));
|
||||||
|
printf(_(" -d, --daemonize detach process from foreground\n"));
|
||||||
|
printf(_(" -p, --pid-file=PATH write a PID file\n"));
|
||||||
printf(_("\n%s monitors a cluster of servers.\n"), progname);
|
printf(_("\n%s monitors a cluster of servers.\n"), progname);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1124,9 +1239,7 @@ void help(const char *progname)
|
|||||||
static void
|
static void
|
||||||
handle_sigint(SIGNAL_ARGS)
|
handle_sigint(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
CloseConnections();
|
terminate(0);
|
||||||
logger_shutdown();
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* SIGHUP: set flag to re-read config file at next convenient time */
|
/* SIGHUP: set flag to re-read config file at next convenient time */
|
||||||
@@ -1141,9 +1254,24 @@ setup_event_handlers(void)
|
|||||||
{
|
{
|
||||||
pqsignal(SIGHUP, handle_sighup);
|
pqsignal(SIGHUP, handle_sighup);
|
||||||
pqsignal(SIGINT, handle_sigint);
|
pqsignal(SIGINT, handle_sigint);
|
||||||
|
pqsignal(SIGTERM, handle_sigint);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void
|
||||||
|
terminate(int retval)
|
||||||
|
{
|
||||||
|
CloseConnections();
|
||||||
|
logger_shutdown();
|
||||||
|
|
||||||
|
if (pid_file)
|
||||||
|
{
|
||||||
|
unlink(pid_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(retval);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_shared_memory(char *last_wal_standby_applied)
|
update_shared_memory(char *last_wal_standby_applied)
|
||||||
@@ -1160,6 +1288,13 @@ update_shared_memory(char *last_wal_standby_applied)
|
|||||||
log_warning(_("Cannot update this standby's shared memory: %s\n"), PQerrorMessage(myLocalConn));
|
log_warning(_("Cannot update this standby's shared memory: %s\n"), PQerrorMessage(myLocalConn));
|
||||||
/* XXX is this enough reason to terminate this repmgrd? */
|
/* XXX is this enough reason to terminate this repmgrd? */
|
||||||
}
|
}
|
||||||
|
else if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||||
|
{
|
||||||
|
/* this surely is more than enough reason to exit */
|
||||||
|
log_crit(_("Cannot update this standby's shared memory, maybe shared_preload_libraries=repmgr_funcs is not set?\n"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1178,8 +1313,115 @@ update_registration(void)
|
|||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn));
|
log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn));
|
||||||
CloseConnections();
|
terminate(ERR_DB_CON);
|
||||||
exit(ERR_DB_CON);
|
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_daemonize()
|
||||||
|
{
|
||||||
|
char *ptr, path[MAXLEN];
|
||||||
|
pid_t pid = fork();
|
||||||
|
|
||||||
|
switch (pid)
|
||||||
|
{
|
||||||
|
case -1:
|
||||||
|
log_err("Error in fork(): %s\n", strerror(errno));
|
||||||
|
exit(ERR_SYS_FAILURE);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0: /* child process */
|
||||||
|
pid = setsid();
|
||||||
|
if (pid == (pid_t)-1)
|
||||||
|
{
|
||||||
|
log_err("Error in setsid(): %s\n", strerror(errno));
|
||||||
|
exit(ERR_SYS_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ensure that we are no longer able to open a terminal */
|
||||||
|
pid = fork();
|
||||||
|
|
||||||
|
if(pid == -1) /* error case */
|
||||||
|
{
|
||||||
|
log_err("Error in fork(): %s\n", strerror(errno));
|
||||||
|
exit(ERR_SYS_FAILURE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pid != 0) /* parent process */
|
||||||
|
{
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* a child just flows along */
|
||||||
|
|
||||||
|
memset(path, 0, MAXLEN);
|
||||||
|
|
||||||
|
for (ptr = config_file + strlen(config_file); ptr > config_file; --ptr)
|
||||||
|
{
|
||||||
|
if (*ptr == '/')
|
||||||
|
{
|
||||||
|
strncpy(path, config_file, ptr - config_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*path == '\0')
|
||||||
|
{
|
||||||
|
*path = '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
chdir(path);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: /* parent process */
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
check_and_create_pid_file(const char *pid_file)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
FILE *fd;
|
||||||
|
char buff[MAXLEN];
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
if (stat(pid_file, &st) != -1)
|
||||||
|
{
|
||||||
|
memset(buff, 0, MAXLEN);
|
||||||
|
|
||||||
|
fd = fopen(pid_file, "r");
|
||||||
|
|
||||||
|
if (fd == NULL)
|
||||||
|
{
|
||||||
|
log_err("PID file %s exists but could not opened for reading. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
fread(buff, MAXLEN - 1, 1, fd);
|
||||||
|
fclose(fd);
|
||||||
|
|
||||||
|
pid = atoi(buff);
|
||||||
|
|
||||||
|
if (pid != 0)
|
||||||
|
{
|
||||||
|
if (kill(pid, 0) != -1)
|
||||||
|
{
|
||||||
|
log_err("PID file %s exists and seems to contain a valid PID. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = fopen(pid_file, "w");
|
||||||
|
if (fd == NULL)
|
||||||
|
{
|
||||||
|
log_err("Could not open PID file %s!\n", pid_file);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(fd, "%d", getpid());
|
||||||
|
fclose(fd);
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ DATA=uninstall_repmgr_funcs.sql
|
|||||||
OBJS=repmgr_funcs.o
|
OBJS=repmgr_funcs.o
|
||||||
|
|
||||||
ifdef USE_PGXS
|
ifdef USE_PGXS
|
||||||
PGXS := $(shell pg_config --pgxs)
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
include $(PGXS)
|
include $(PGXS)
|
||||||
else
|
else
|
||||||
subdir = contrib/repmgr/sql
|
subdir = contrib/repmgr/sql
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.c
|
* strutil.c
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap);
|
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||||
|
|
||||||
/* Add strnlen on platforms that don't have it, like OS X */
|
/* Add strnlen on platforms that don't have it, like OS X */
|
||||||
#ifndef strnlen
|
#ifndef strnlen
|
||||||
@@ -44,7 +44,7 @@ xvsnprintf(char *str, size_t size, const char *format, va_list ap)
|
|||||||
|
|
||||||
retval = vsnprintf(str, size, format, ap);
|
retval = vsnprintf(str, size, format, ap);
|
||||||
|
|
||||||
if (retval >= size)
|
if (retval >= (int)size)
|
||||||
{
|
{
|
||||||
log_err(_("Buffer of size not large enough to format entire string '%s'\n"),
|
log_err(_("Buffer of size not large enough to format entire string '%s'\n"),
|
||||||
str);
|
str);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.h
|
* strutil.h
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
@@ -31,9 +31,9 @@
|
|||||||
#define MAXCONNINFO 1024
|
#define MAXCONNINFO 1024
|
||||||
|
|
||||||
|
|
||||||
extern int xsnprintf(char *str, size_t size, const char *format, ...);
|
extern int xsnprintf(char *str, size_t size, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
extern int sqlquery_snprintf(char *str, const char *format, ...);
|
extern int sqlquery_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||||
extern int maxlen_snprintf(char *str, const char *format, ...);
|
extern int maxlen_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||||
|
|
||||||
/* Add strnlen on platforms that don't have it, like OS X */
|
/* Add strnlen on platforms that don't have it, like OS X */
|
||||||
#ifndef strnlen
|
#ifndef strnlen
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* uninstall_repmgr.sql
|
* uninstall_repmgr.sql
|
||||||
*
|
*
|
||||||
* Copyright (C) 2ndQuadrant, 2010-2012
|
* Copyright (C) 2ndQuadrant, 2010-2014
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user