mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 23:26:30 +00:00
Compare commits
77 Commits
v4.3.0rc1
...
REL4_3_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef382dfede | ||
|
|
bc93d2996c | ||
|
|
0946073406 | ||
|
|
129c8782a4 | ||
|
|
5493055a1d | ||
|
|
a50f0e7cc0 | ||
|
|
adfde1b681 | ||
|
|
d4b17635fe | ||
|
|
e4c573a7f6 | ||
|
|
492665e34c | ||
|
|
2d7c38e2ef | ||
|
|
9ee2448583 | ||
|
|
cf9458161f | ||
|
|
67dc42d2ad | ||
|
|
3b96b2afce | ||
|
|
216f274c15 | ||
|
|
8cb101be1d | ||
|
|
03b29908e2 | ||
|
|
99be03f000 | ||
|
|
7aaac343f8 | ||
|
|
68470a9167 | ||
|
|
35320c27bd | ||
|
|
b7b9db7e9c | ||
|
|
01e11950a5 | ||
|
|
fcaee6e6e8 | ||
|
|
538d5f9df0 | ||
|
|
4e8b94c105 | ||
|
|
9ee51bb0cb | ||
|
|
bab07cdda1 | ||
|
|
b03f07ca8f | ||
|
|
39fbe02c48 | ||
|
|
2249b79811 | ||
|
|
bb0fd944ae | ||
|
|
b4ca6851ab | ||
|
|
347948b79f | ||
|
|
83e492d4ef | ||
|
|
1906ea89bd | ||
|
|
eab4fd2795 | ||
|
|
3f1fe9b6c2 | ||
|
|
e672f7e3ee | ||
|
|
fd86160dff | ||
|
|
f19cf62f09 | ||
|
|
8018ba97d6 | ||
|
|
73554c6e16 | ||
|
|
f23a93e12d | ||
|
|
d9947a46e8 | ||
|
|
e3a632e29d | ||
|
|
939cbd0721 | ||
|
|
c45c5abfb8 | ||
|
|
1953ec7459 | ||
|
|
a6eacca6e4 | ||
|
|
948e076ad9 | ||
|
|
a3bd9d33ff | ||
|
|
9dc928a7d5 | ||
|
|
9acf7bdfea | ||
|
|
29acd10f37 | ||
|
|
9df511eee3 | ||
|
|
6441db23ff | ||
|
|
7792de3543 | ||
|
|
94fe3e395e | ||
|
|
ff26173b1e | ||
|
|
4c11a57334 | ||
|
|
1d2d6e3587 | ||
|
|
c03913d32a | ||
|
|
37a41a66f9 | ||
|
|
4c2c8ecbab | ||
|
|
b84b6180ee | ||
|
|
58f55222d9 | ||
|
|
5cbaff8d0a | ||
|
|
a38e229e61 | ||
|
|
272abdd483 | ||
|
|
b4f6043abc | ||
|
|
a7f3f899ff | ||
|
|
3ec43eda36 | ||
|
|
ce8e1cccc4 | ||
|
|
70bfa4c8e1 | ||
|
|
f0d5ad503d |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -47,6 +47,9 @@ lib*.pc
|
|||||||
# other
|
# other
|
||||||
/.lineno
|
/.lineno
|
||||||
*.dSYM
|
*.dSYM
|
||||||
|
*.orig
|
||||||
|
*.rej
|
||||||
|
|
||||||
# generated binaries
|
# generated binaries
|
||||||
repmgr
|
repmgr
|
||||||
repmgrd
|
repmgrd
|
||||||
|
|||||||
11
HISTORY
11
HISTORY
@@ -1,4 +1,8 @@
|
|||||||
4.3 2019-??
|
4.3.1 2019-12-??
|
||||||
|
repmgr: ensure an existing replication slot is not deleted if the
|
||||||
|
follow target is the node's current upstream (Ian)
|
||||||
|
|
||||||
|
4.3 2019-04-02
|
||||||
repmgr: add "daemon (start|stop)" command; GitHub #528 (Ian)
|
repmgr: add "daemon (start|stop)" command; GitHub #528 (Ian)
|
||||||
repmgr: add --version-number command line option (Ian)
|
repmgr: add --version-number command line option (Ian)
|
||||||
repmgr: add --compact option to "cluster show"; GitHub #521 (Ian)
|
repmgr: add --compact option to "cluster show"; GitHub #521 (Ian)
|
||||||
@@ -15,6 +19,8 @@
|
|||||||
repmgr: add sanity check for correct extension version (Ian)
|
repmgr: add sanity check for correct extension version (Ian)
|
||||||
repmgr: ensure "witness register --dry-run" does not attempt to read node
|
repmgr: ensure "witness register --dry-run" does not attempt to read node
|
||||||
tables if repmgr extension not installed; GitHub #513 (Ian)
|
tables if repmgr extension not installed; GitHub #513 (Ian)
|
||||||
|
repmgr: ensure "standby register" fails when --upstream-node-id is the
|
||||||
|
same as the local node ID (Ian)
|
||||||
repmgrd: check binary and extension major versions match; GitHub #515 (Ian)
|
repmgrd: check binary and extension major versions match; GitHub #515 (Ian)
|
||||||
repmgrd: on a cascaded standby, don't fail over if "failover=manual";
|
repmgrd: on a cascaded standby, don't fail over if "failover=manual";
|
||||||
GitHub #531 (Ian)
|
GitHub #531 (Ian)
|
||||||
@@ -22,6 +28,9 @@
|
|||||||
candidates (Ian)
|
candidates (Ian)
|
||||||
repmgrd: add option "connection_check_type" (Ian)
|
repmgrd: add option "connection_check_type" (Ian)
|
||||||
repmgrd: improve witness monitoring when primary node not available (Ian)
|
repmgrd: improve witness monitoring when primary node not available (Ian)
|
||||||
|
repmgrd: handle situation where a primary has unexpectedly appeared
|
||||||
|
during failover; GitHub #420 (Ian)
|
||||||
|
general: fix Makefile (John)
|
||||||
|
|
||||||
4.2 2018-10-24
|
4.2 2018-10-24
|
||||||
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ Documentation
|
|||||||
|
|
||||||
The main `repmgr` documentation is available here:
|
The main `repmgr` documentation is available here:
|
||||||
|
|
||||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.2/index.html)
|
> [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
||||||
|
|
||||||
The `README` file for `repmgr` 3.x is available here:
|
The `README` file for `repmgr` 3.x is available here:
|
||||||
|
|
||||||
@@ -72,7 +72,7 @@ Please report bugs and other issues to:
|
|||||||
|
|
||||||
* https://github.com/2ndQuadrant/repmgr
|
* https://github.com/2ndQuadrant/repmgr
|
||||||
|
|
||||||
Further information is available at https://www.repmgr.org/
|
Further information is available at https://repmgr.org/
|
||||||
|
|
||||||
We'd love to hear from you about how you use repmgr. Case studies and
|
We'd love to hear from you about how you use repmgr. Case studies and
|
||||||
news are always welcome. Send us an email at info@2ndQuadrant.com, or
|
news are always welcome. Send us an email at info@2ndQuadrant.com, or
|
||||||
@@ -97,6 +97,7 @@ Thanks from the repmgr core team.
|
|||||||
Further reading
|
Further reading
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
* [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
||||||
* https://blog.2ndquadrant.com/repmgr-3-2-is-here-barman-support-brand-new-high-availability-features/
|
* https://blog.2ndquadrant.com/repmgr-3-2-is-here-barman-support-brand-new-high-availability-features/
|
||||||
* https://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
* https://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
||||||
* https://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
* https://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||||
|
|||||||
57
configfile.c
57
configfile.c
@@ -484,7 +484,14 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
node_id_found = true;
|
node_id_found = true;
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "node_name") == 0)
|
else if (strcmp(name, "node_name") == 0)
|
||||||
strncpy(options->node_name, value, MAXLEN);
|
{
|
||||||
|
if (strlen(value) < sizeof(options->node_name))
|
||||||
|
strncpy(options->node_name, value, sizeof(options->node_name));
|
||||||
|
else
|
||||||
|
item_list_append_format(error_list,
|
||||||
|
_("value for \"node_name\" must contain fewer than %lu characters"),
|
||||||
|
sizeof(options->node_name));
|
||||||
|
}
|
||||||
else if (strcmp(name, "conninfo") == 0)
|
else if (strcmp(name, "conninfo") == 0)
|
||||||
strncpy(options->conninfo, value, MAXLEN);
|
strncpy(options->conninfo, value, MAXLEN);
|
||||||
else if (strcmp(name, "data_directory") == 0)
|
else if (strcmp(name, "data_directory") == 0)
|
||||||
@@ -494,11 +501,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
|
|
||||||
else if (strcmp(name, "replication_user") == 0)
|
else if (strcmp(name, "replication_user") == 0)
|
||||||
{
|
{
|
||||||
if (strlen(value) < NAMEDATALEN)
|
if (strlen(value) < sizeof(options->replication_user))
|
||||||
strncpy(options->replication_user, value, NAMEDATALEN);
|
strncpy(options->replication_user, value, sizeof(options->replication_user));
|
||||||
else
|
else
|
||||||
item_list_append(error_list,
|
item_list_append_format(error_list,
|
||||||
_("value for \"replication_user\" must contain fewer than " STR(NAMEDATALEN) " characters"));
|
_("value for \"replication_user\" must contain fewer than %lu characters"),
|
||||||
|
sizeof(options->replication_user));
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "pg_bindir") == 0)
|
else if (strcmp(name, "pg_bindir") == 0)
|
||||||
strncpy(options->pg_bindir, value, MAXPGPATH);
|
strncpy(options->pg_bindir, value, MAXPGPATH);
|
||||||
@@ -645,7 +653,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
item_list_append(error_list,
|
item_list_append(error_list,
|
||||||
_("value for \"connection_check_type\" must be \"ping\" or \"connection\"\n"));
|
_("value for \"connection_check_type\" must be \"ping\", \"connection\" or \"query\"\n"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "primary_visibility_consensus") == 0)
|
else if (strcmp(name, "primary_visibility_consensus") == 0)
|
||||||
@@ -828,15 +836,16 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
|
||||||
if (conninfo_options == NULL)
|
if (conninfo_options == NULL)
|
||||||
{
|
{
|
||||||
char error_message_buf[MAXLEN] = "";
|
PQExpBufferData error_message_buf;
|
||||||
|
initPQExpBuffer(&error_message_buf);
|
||||||
|
|
||||||
snprintf(error_message_buf,
|
appendPQExpBuffer(&error_message_buf,
|
||||||
MAXLEN,
|
_("\"conninfo\": %s (provided: \"%s\")"),
|
||||||
_("\"conninfo\": %s (provided: \"%s\")"),
|
conninfo_errmsg,
|
||||||
conninfo_errmsg,
|
options->conninfo);
|
||||||
options->conninfo);
|
|
||||||
|
|
||||||
item_list_append(error_list, error_message_buf);
|
item_list_append(error_list, error_message_buf.data);
|
||||||
|
termPQExpBuffer(&error_message_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQconninfoFree(conninfo_options);
|
PQconninfoFree(conninfo_options);
|
||||||
@@ -1196,7 +1205,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strncmp(new_options.node_name, orig_options->node_name, MAXLEN) != 0)
|
if (strncmp(new_options.node_name, orig_options->node_name, sizeof(orig_options->node_name)) != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
||||||
return false;
|
return false;
|
||||||
@@ -1398,7 +1407,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
{
|
{
|
||||||
orig_options->connection_check_type = new_options.connection_check_type;
|
orig_options->connection_check_type = new_options.connection_check_type;
|
||||||
log_info(_("\"connection_check_type\" is now \"%s\""),
|
log_info(_("\"connection_check_type\" is now \"%s\""),
|
||||||
new_options.connection_check_type == CHECK_PING ? "ping" : "connection");
|
print_connection_check_type(new_options.connection_check_type));
|
||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2017,3 +2026,21 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
|
|||||||
|
|
||||||
return backup_options_ok;
|
return backup_options_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *
|
||||||
|
print_connection_check_type(ConnectionCheckType type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CHECK_PING:
|
||||||
|
return "ping";
|
||||||
|
case CHECK_QUERY:
|
||||||
|
return "query";
|
||||||
|
case CHECK_CONNECTION:
|
||||||
|
return "connection";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should never reach here */
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ typedef struct
|
|||||||
{
|
{
|
||||||
/* node information */
|
/* node information */
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char conninfo[MAXLEN];
|
char conninfo[MAXLEN];
|
||||||
char replication_user[NAMEDATALEN];
|
char replication_user[NAMEDATALEN];
|
||||||
char data_directory[MAXPGPATH];
|
char data_directory[MAXPGPATH];
|
||||||
@@ -329,5 +329,6 @@ void free_parsed_argv(char ***argv_array);
|
|||||||
/* called by repmgr-client and repmgrd */
|
/* called by repmgr-client and repmgrd */
|
||||||
void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
|
void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
|
||||||
void print_item_list(ItemList *item_list);
|
void print_item_list(ItemList *item_list);
|
||||||
|
const char *print_connection_check_type(ConnectionCheckType type);
|
||||||
|
|
||||||
#endif /* _REPMGR_CONFIGFILE_H_ */
|
#endif /* _REPMGR_CONFIGFILE_H_ */
|
||||||
|
|||||||
@@ -301,6 +301,8 @@ get_controlfile(const char *DataDir)
|
|||||||
ControlFilePath);
|
ControlFilePath);
|
||||||
log_detail("%s", strerror(errno));
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
|
||||||
return control_file_info;
|
return control_file_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
188
dbutils.c
188
dbutils.c
@@ -43,6 +43,8 @@ int bdr_version_num = UNKNOWN_BDR_VERSION_NUM;
|
|||||||
static void log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
static void log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||||
|
|
||||||
|
static bool _is_server_available(const char *conninfo, bool quiet);
|
||||||
|
|
||||||
static PGconn *_establish_db_connection(const char *conninfo,
|
static PGconn *_establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error,
|
const bool exit_on_error,
|
||||||
const bool log_notice,
|
const bool log_notice,
|
||||||
@@ -67,16 +69,19 @@ void
|
|||||||
log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
log_db_error(PGconn *conn, const char *query_text, const char *fmt,...)
|
||||||
{
|
{
|
||||||
va_list ap;
|
va_list ap;
|
||||||
|
char buf[MAXLEN];
|
||||||
|
int retval;
|
||||||
|
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
|
retval = vsnprintf(buf, MAXLEN, fmt, ap);
|
||||||
log_error(fmt, ap);
|
|
||||||
|
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
|
|
||||||
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
|
if (retval < MAXLEN)
|
||||||
|
log_error("%s", buf);
|
||||||
|
|
||||||
|
if (conn != NULL)
|
||||||
{
|
{
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (query_text != NULL)
|
if (query_text != NULL)
|
||||||
@@ -190,13 +195,13 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
|
|||||||
{
|
{
|
||||||
if (log_notice)
|
if (log_notice)
|
||||||
{
|
{
|
||||||
log_notice(_("connection to database failed:\n %s"),
|
log_notice(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error(_("connection to database failed:\n %s"),
|
log_error(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
}
|
}
|
||||||
log_detail(_("attempted to connect using:\n %s"),
|
log_detail(_("attempted to connect using:\n %s"),
|
||||||
connection_string);
|
connection_string);
|
||||||
@@ -287,8 +292,9 @@ establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
|||||||
/* Check to see that the backend connection was successfully made */
|
/* Check to see that the backend connection was successfully made */
|
||||||
if ((PQstatus(conn) != CONNECTION_OK))
|
if ((PQstatus(conn) != CONNECTION_OK))
|
||||||
{
|
{
|
||||||
log_error(_("connection to database failed:\n %s"),
|
log_error(_("connection to database failed"));
|
||||||
PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
|
|
||||||
if (exit_on_error)
|
if (exit_on_error)
|
||||||
{
|
{
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
@@ -338,7 +344,9 @@ is_superuser_connection(PGconn *conn, t_connection_user *userinfo)
|
|||||||
|
|
||||||
if (userinfo != NULL)
|
if (userinfo != NULL)
|
||||||
{
|
{
|
||||||
strncpy(userinfo->username, current_user, MAXLEN);
|
snprintf(userinfo->username,
|
||||||
|
sizeof(userinfo->username),
|
||||||
|
"%s", current_user);
|
||||||
userinfo->is_superuser = is_superuser;
|
userinfo->is_superuser = is_superuser;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1073,7 +1081,7 @@ get_pg_setting(PGconn *conn, const char *setting, char *output)
|
|||||||
{
|
{
|
||||||
if (strcmp(PQgetvalue(res, i, 0), setting) == 0)
|
if (strcmp(PQgetvalue(res, i, 0), setting) == 0)
|
||||||
{
|
{
|
||||||
strncpy(output, PQgetvalue(res, i, 1), MAXLEN);
|
snprintf(output, MAXLEN, "%s", PQgetvalue(res, i, 1));
|
||||||
success = true;
|
success = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1101,7 +1109,7 @@ alter_system_int(PGconn *conn, const char *name, int value)
|
|||||||
{
|
{
|
||||||
PQExpBufferData query;
|
PQExpBufferData query;
|
||||||
PGresult *res = NULL;
|
PGresult *res = NULL;
|
||||||
bool success = false;
|
bool success = true;
|
||||||
|
|
||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
appendPQExpBuffer(&query,
|
appendPQExpBuffer(&query,
|
||||||
@@ -1117,7 +1125,6 @@ alter_system_int(PGconn *conn, const char *name, int value)
|
|||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
@@ -1174,7 +1181,7 @@ get_cluster_size(PGconn *conn, char *size)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(size, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(size, MAXLEN, "%s", PQgetvalue(res, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
@@ -1222,7 +1229,7 @@ get_server_version(PGconn *conn, char *server_version_buf)
|
|||||||
* first space.
|
* first space.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
strncpy(_server_version_buf, PQgetvalue(res, 0, 1), MAXVERSIONSTR);
|
snprintf(_server_version_buf, MAXVERSIONSTR, "%s", PQgetvalue(res, 0, 1));
|
||||||
|
|
||||||
for (i = 0; i < MAXVERSIONSTR; i++)
|
for (i = 0; i < MAXVERSIONSTR; i++)
|
||||||
{
|
{
|
||||||
@@ -1349,7 +1356,8 @@ _get_primary_connection(PGconn *conn,
|
|||||||
|
|
||||||
/* initialize with the values of the current node being processed */
|
/* initialize with the values of the current node being processed */
|
||||||
node_id = atoi(PQgetvalue(res, i, 0));
|
node_id = atoi(PQgetvalue(res, i, 0));
|
||||||
strncpy(remote_conninfo, PQgetvalue(res, i, 1), MAXCONNINFO);
|
snprintf(remote_conninfo, MAXCONNINFO, "%s", PQgetvalue(res, i, 1));
|
||||||
|
|
||||||
log_verbose(LOG_INFO,
|
log_verbose(LOG_INFO,
|
||||||
_("checking if node %i is primary"),
|
_("checking if node %i is primary"),
|
||||||
node_id);
|
node_id);
|
||||||
@@ -1513,10 +1521,10 @@ get_ready_archive_files(PGconn *conn, const char *data_directory)
|
|||||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||||
{
|
{
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
char file_path[MAXPGPATH] = "";
|
char file_path[MAXPGPATH + sizeof(arcdir_ent->d_name)];
|
||||||
int basenamelen = 0;
|
int basenamelen = 0;
|
||||||
|
|
||||||
snprintf(file_path, MAXPGPATH,
|
snprintf(file_path, sizeof(file_path),
|
||||||
"%s/%s",
|
"%s/%s",
|
||||||
archive_status_dir,
|
archive_status_dir,
|
||||||
arcdir_ent->d_name);
|
arcdir_ent->d_name);
|
||||||
@@ -1994,9 +2002,13 @@ get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions)
|
|||||||
/* caller wants to know which versions are installed/available */
|
/* caller wants to know which versions are installed/available */
|
||||||
if (extversions != NULL)
|
if (extversions != NULL)
|
||||||
{
|
{
|
||||||
strncpy(extversions->default_version, PQgetvalue(res, 0, 2), 7);
|
snprintf(extversions->default_version,
|
||||||
|
sizeof(extversions->default_version),
|
||||||
|
"%s", PQgetvalue(res, 0, 2));
|
||||||
extversions->default_version_num = available_version;
|
extversions->default_version_num = available_version;
|
||||||
strncpy(extversions->installed_version, PQgetvalue(res, 0, 4), 7);
|
snprintf(extversions->installed_version,
|
||||||
|
sizeof(extversions->installed_version),
|
||||||
|
"%s", PQgetvalue(res, 0, 4));
|
||||||
extversions->installed_version_num = installed_version;
|
extversions->installed_version_num = installed_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2197,17 +2209,17 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row, bool init_
|
|||||||
node_info->upstream_node_id = atoi(PQgetvalue(res, row, 2));
|
node_info->upstream_node_id = atoi(PQgetvalue(res, row, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
|
snprintf(node_info->node_name, sizeof(node_info->node_name), "%s", PQgetvalue(res, row, 3));
|
||||||
strncpy(node_info->conninfo, PQgetvalue(res, row, 4), MAXLEN);
|
snprintf(node_info->conninfo, sizeof(node_info->conninfo), "%s", PQgetvalue(res, row, 4));
|
||||||
strncpy(node_info->repluser, PQgetvalue(res, row, 5), NAMEDATALEN);
|
snprintf(node_info->repluser, sizeof(node_info->repluser), "%s", PQgetvalue(res, row, 5));
|
||||||
strncpy(node_info->slot_name, PQgetvalue(res, row, 6), MAXLEN);
|
snprintf(node_info->slot_name, sizeof(node_info->slot_name), "%s", PQgetvalue(res, row, 6));
|
||||||
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
snprintf(node_info->location, sizeof(node_info->location), "%s", PQgetvalue(res, row, 7));
|
||||||
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
||||||
node_info->active = atobool(PQgetvalue(res, row, 9));
|
node_info->active = atobool(PQgetvalue(res, row, 9));
|
||||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXPGPATH);
|
snprintf(node_info->config_file, sizeof(node_info->config_file), "%s", PQgetvalue(res, row, 10));
|
||||||
|
|
||||||
/* This won't normally be set */
|
/* This won't normally be set */
|
||||||
strncpy(node_info->upstream_node_name, PQgetvalue(res, row, 11), MAXLEN);
|
snprintf(node_info->upstream_node_name, sizeof(node_info->upstream_node_name), "%s", PQgetvalue(res, row, 11));
|
||||||
|
|
||||||
/* Set remaining struct fields with default values */
|
/* Set remaining struct fields with default values */
|
||||||
|
|
||||||
@@ -3461,11 +3473,15 @@ config_file_list_add(t_configfile_list *list, const char *file, const char *file
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
strncpy(list->files[list->entries]->filepath, file, MAXPGPATH);
|
snprintf(list->files[list->entries]->filepath,
|
||||||
|
sizeof(list->files[list->entries]->filepath),
|
||||||
|
"%s", file);
|
||||||
canonicalize_path(list->files[list->entries]->filepath);
|
canonicalize_path(list->files[list->entries]->filepath);
|
||||||
|
|
||||||
|
snprintf(list->files[list->entries]->filename,
|
||||||
|
sizeof(list->files[list->entries]->filename),
|
||||||
|
"%s", filename);
|
||||||
|
|
||||||
strncpy(list->files[list->entries]->filename, filename, MAXPGPATH);
|
|
||||||
list->files[list->entries]->in_data_directory = in_data_dir;
|
list->files[list->entries]->in_data_directory = in_data_dir;
|
||||||
|
|
||||||
list->entries++;
|
list->entries++;
|
||||||
@@ -3545,13 +3561,10 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
|||||||
log_verbose(LOG_DEBUG, "_create_event(): event is \"%s\" for node %i", event, node_id);
|
log_verbose(LOG_DEBUG, "_create_event(): event is \"%s\" for node %i", event, node_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only attempt to write a record if a connection handle was provided.
|
* Only attempt to write a record if a connection handle was provided,
|
||||||
* Also check that the repmgr schema has been properly initialised - if
|
* and the connection handle points to a node which is not in recovery.
|
||||||
* not it means no configuration file was provided, which can happen with
|
|
||||||
* e.g. `repmgr standby clone`, and we won't know which schema to write
|
|
||||||
* to.
|
|
||||||
*/
|
*/
|
||||||
if (conn != NULL && PQstatus(conn) == CONNECTION_OK)
|
if (conn != NULL && PQstatus(conn) == CONNECTION_OK && get_recovery_type(conn) == RECTYPE_PRIMARY)
|
||||||
{
|
{
|
||||||
int n_node_id = htonl(node_id);
|
int n_node_id = htonl(node_id);
|
||||||
char *t_successful = successful ? "TRUE" : "FALSE";
|
char *t_successful = successful ? "TRUE" : "FALSE";
|
||||||
@@ -3605,7 +3618,7 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Store timestamp to send to the notification command */
|
/* Store timestamp to send to the notification command */
|
||||||
strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(event_timestamp, MAXLEN, "%s", PQgetvalue(res, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
@@ -4040,8 +4053,12 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(record->slot_name, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(record->slot_name,
|
||||||
strncpy(record->slot_type, PQgetvalue(res, 0, 1), MAXLEN);
|
sizeof(record->slot_name),
|
||||||
|
"%s", PQgetvalue(res, 0, 0));
|
||||||
|
snprintf(record->slot_type,
|
||||||
|
sizeof(record->slot_type),
|
||||||
|
"%s", PQgetvalue(res, 0, 1));
|
||||||
record->active = atobool(PQgetvalue(res, 0, 2));
|
record->active = atobool(PQgetvalue(res, 0, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4172,7 +4189,8 @@ get_tablespace_name_by_location(PGconn *conn, const char *location, char *name)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(name, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(name, MAXLEN,
|
||||||
|
"%s", PQgetvalue(res, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
@@ -4206,7 +4224,7 @@ cancel_query(PGconn *conn, int timeout)
|
|||||||
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
|
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to cancel current query"));
|
log_warning(_("unable to cancel current query"));
|
||||||
log_detail("%s", errbuf);
|
log_detail("\n%s", errbuf);
|
||||||
PQfreeCancel(pgcancel);
|
PQfreeCancel(pgcancel);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -4236,7 +4254,7 @@ wait_connection_availability(PGconn *conn, int timeout)
|
|||||||
long long timeout_ms;
|
long long timeout_ms;
|
||||||
|
|
||||||
/* calculate timeout in microseconds */
|
/* calculate timeout in microseconds */
|
||||||
timeout_ms = timeout * 1000000;
|
timeout_ms = (long long) timeout * 1000000;
|
||||||
|
|
||||||
while (timeout_ms > 0)
|
while (timeout_ms > 0)
|
||||||
{
|
{
|
||||||
@@ -4295,13 +4313,33 @@ wait_connection_availability(PGconn *conn, int timeout)
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
is_server_available(const char *conninfo)
|
is_server_available(const char *conninfo)
|
||||||
|
{
|
||||||
|
return _is_server_available(conninfo, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_server_available_quiet(const char *conninfo)
|
||||||
|
{
|
||||||
|
return _is_server_available(conninfo, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
_is_server_available(const char *conninfo, bool quiet)
|
||||||
{
|
{
|
||||||
PGPing status = PQping(conninfo);
|
PGPing status = PQping(conninfo);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "is_server_available(): ping status for %s is %i", conninfo, (int)status);
|
log_verbose(LOG_DEBUG, "is_server_available(): ping status for \"%s\" is %s", conninfo, print_pqping_status(status));
|
||||||
if (status == PQPING_OK)
|
if (status == PQPING_OK)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
if (quiet == false)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to ping \"%s\""), conninfo);
|
||||||
|
log_detail(_("PQping() returned \"%s\""), print_pqping_status(status));
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4314,10 +4352,17 @@ is_server_available_params(t_conninfo_param_list *param_list)
|
|||||||
false);
|
false);
|
||||||
|
|
||||||
/* deparsing the param_list adds overhead, so only do it if needed */
|
/* deparsing the param_list adds overhead, so only do it if needed */
|
||||||
if (log_level == LOG_DEBUG)
|
if (log_level == LOG_DEBUG || status != PQPING_OK)
|
||||||
{
|
{
|
||||||
char *conninfo_str = param_list_to_string(param_list);
|
char *conninfo_str = param_list_to_string(param_list);
|
||||||
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for %s is %i", conninfo_str, (int)status);
|
log_verbose(LOG_DEBUG, "is_server_available_params(): ping status for \"%s\" is %s", conninfo_str, print_pqping_status(status));
|
||||||
|
|
||||||
|
if (status != PQPING_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to ping \"%s\""), conninfo_str);
|
||||||
|
log_detail(_("PQping() returned \"%s\""), print_pqping_status(status));
|
||||||
|
}
|
||||||
|
|
||||||
pfree(conninfo_str);
|
pfree(conninfo_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4355,7 +4400,7 @@ connection_ping_reconnect(PGconn *conn)
|
|||||||
if (PQstatus(conn) != CONNECTION_OK)
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("connection error, attempting to reset"));
|
log_warning(_("connection error, attempting to reset"));
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
PQreset(conn);
|
PQreset(conn);
|
||||||
ping_result = connection_ping(conn);
|
ping_result = connection_ping(conn);
|
||||||
}
|
}
|
||||||
@@ -4887,6 +4932,7 @@ void
|
|||||||
init_replication_info(ReplInfo *replication_info)
|
init_replication_info(ReplInfo *replication_info)
|
||||||
{
|
{
|
||||||
memset(replication_info->current_timestamp, 0, sizeof(replication_info->current_timestamp));
|
memset(replication_info->current_timestamp, 0, sizeof(replication_info->current_timestamp));
|
||||||
|
replication_info->in_recovery = false;
|
||||||
replication_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
replication_info->last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||||
replication_info->last_wal_replay_lsn = InvalidXLogRecPtr;
|
replication_info->last_wal_replay_lsn = InvalidXLogRecPtr;
|
||||||
memset(replication_info->last_xact_replay_timestamp, 0, sizeof(replication_info->last_xact_replay_timestamp));
|
memset(replication_info->last_xact_replay_timestamp, 0, sizeof(replication_info->last_xact_replay_timestamp));
|
||||||
@@ -4907,6 +4953,7 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
|||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
appendPQExpBufferStr(&query,
|
appendPQExpBufferStr(&query,
|
||||||
" SELECT ts, "
|
" SELECT ts, "
|
||||||
|
" in_recovery, "
|
||||||
" last_wal_receive_lsn, "
|
" last_wal_receive_lsn, "
|
||||||
" last_wal_replay_lsn, "
|
" last_wal_replay_lsn, "
|
||||||
" last_xact_replay_timestamp, "
|
" last_xact_replay_timestamp, "
|
||||||
@@ -4924,6 +4971,7 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
|||||||
" upstream_last_seen "
|
" upstream_last_seen "
|
||||||
" FROM ( "
|
" FROM ( "
|
||||||
" SELECT CURRENT_TIMESTAMP AS ts, "
|
" SELECT CURRENT_TIMESTAMP AS ts, "
|
||||||
|
" pg_catalog.pg_is_in_recovery() AS in_recovery, "
|
||||||
" pg_catalog.pg_last_xact_replay_timestamp() AS last_xact_replay_timestamp, ");
|
" pg_catalog.pg_last_xact_replay_timestamp() AS last_xact_replay_timestamp, ");
|
||||||
|
|
||||||
|
|
||||||
@@ -4989,14 +5037,19 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strncpy(replication_info->current_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(replication_info->current_timestamp,
|
||||||
replication_info->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 1));
|
sizeof(replication_info->current_timestamp),
|
||||||
replication_info->last_wal_replay_lsn = parse_lsn(PQgetvalue(res, 0, 2));
|
"%s", PQgetvalue(res, 0, 0));
|
||||||
strncpy(replication_info->last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
replication_info->in_recovery = atobool(PQgetvalue(res, 0, 1));
|
||||||
replication_info->replication_lag_time = atoi(PQgetvalue(res, 0, 4));
|
replication_info->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 2));
|
||||||
replication_info->receiving_streamed_wal = atobool(PQgetvalue(res, 0, 5));
|
replication_info->last_wal_replay_lsn = parse_lsn(PQgetvalue(res, 0, 3));
|
||||||
replication_info->wal_replay_paused = atobool(PQgetvalue(res, 0, 6));
|
snprintf(replication_info->last_xact_replay_timestamp,
|
||||||
replication_info->upstream_last_seen = atoi(PQgetvalue(res, 0, 7));
|
sizeof(replication_info->last_xact_replay_timestamp),
|
||||||
|
"%s", PQgetvalue(res, 0, 4));
|
||||||
|
replication_info->replication_lag_time = atoi(PQgetvalue(res, 0, 5));
|
||||||
|
replication_info->receiving_streamed_wal = atobool(PQgetvalue(res, 0, 6));
|
||||||
|
replication_info->wal_replay_paused = atobool(PQgetvalue(res, 0, 7));
|
||||||
|
replication_info->upstream_last_seen = atoi(PQgetvalue(res, 0, 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
@@ -5042,13 +5095,12 @@ get_replication_lag_seconds(PGconn *conn)
|
|||||||
log_warning("%s", PQerrorMessage(conn));
|
log_warning("%s", PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* XXX magic number */
|
return UNKNOWN_REPLICATION_LAG;
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!PQntuples(res))
|
if (!PQntuples(res))
|
||||||
{
|
{
|
||||||
return -1;
|
return UNKNOWN_REPLICATION_LAG;
|
||||||
}
|
}
|
||||||
|
|
||||||
lag_seconds = atoi(PQgetvalue(res, 0, 0));
|
lag_seconds = atoi(PQgetvalue(res, 0, 0));
|
||||||
@@ -5506,7 +5558,9 @@ get_default_bdr_replication_set(PGconn *conn)
|
|||||||
/* For BDR2, we use a custom replication set */
|
/* For BDR2, we use a custom replication set */
|
||||||
namelen = strlen(BDR2_REPLICATION_SET_NAME);
|
namelen = strlen(BDR2_REPLICATION_SET_NAME);
|
||||||
default_replication_set = pg_malloc0(namelen + 1);
|
default_replication_set = pg_malloc0(namelen + 1);
|
||||||
strncpy(default_replication_set, BDR2_REPLICATION_SET_NAME, namelen);
|
snprintf(default_replication_set,
|
||||||
|
namelen + 1,
|
||||||
|
"%s", BDR2_REPLICATION_SET_NAME);
|
||||||
|
|
||||||
return default_replication_set;
|
return default_replication_set;
|
||||||
}
|
}
|
||||||
@@ -5536,7 +5590,9 @@ get_default_bdr_replication_set(PGconn *conn)
|
|||||||
namelen = strlen(PQgetvalue(res, 0, 0));
|
namelen = strlen(PQgetvalue(res, 0, 0));
|
||||||
default_replication_set = pg_malloc0(namelen + 1);
|
default_replication_set = pg_malloc0(namelen + 1);
|
||||||
|
|
||||||
strncpy(default_replication_set, PQgetvalue(res, 0, 0), namelen);
|
snprintf(default_replication_set,
|
||||||
|
namelen,
|
||||||
|
"%s", PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
@@ -5757,7 +5813,9 @@ get_bdr_other_node_name(PGconn *conn, int node_id, char *node_name)
|
|||||||
|
|
||||||
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
strncpy(node_name, PQgetvalue(res, 0, 0), MAXLEN);
|
snprintf(node_name,
|
||||||
|
NAMEDATALEN,
|
||||||
|
"%s", PQgetvalue(res, 0, 0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -5940,12 +5998,12 @@ _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list)
|
|||||||
static void
|
static void
|
||||||
_populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row)
|
_populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row)
|
||||||
{
|
{
|
||||||
strncpy(node_info->node_sysid, PQgetvalue(res, row, 0), MAXLEN);
|
snprintf(node_info->node_sysid, sizeof(node_info->node_sysid), "%s", PQgetvalue(res, row, 0));
|
||||||
node_info->node_timeline = atoi(PQgetvalue(res, row, 1));
|
node_info->node_timeline = atoi(PQgetvalue(res, row, 1));
|
||||||
node_info->node_dboid = atoi(PQgetvalue(res, row, 2));
|
node_info->node_dboid = atoi(PQgetvalue(res, row, 2));
|
||||||
strncpy(node_info->node_name, PQgetvalue(res, row, 3), MAXLEN);
|
snprintf(node_info->node_name, sizeof(node_info->node_name), "%s", PQgetvalue(res, row, 3));
|
||||||
strncpy(node_info->node_local_dsn, PQgetvalue(res, row, 4), MAXLEN);
|
snprintf(node_info->node_local_dsn, sizeof(node_info->node_local_dsn), "%s", PQgetvalue(res, row, 4));
|
||||||
strncpy(node_info->peer_state_name, PQgetvalue(res, row, 5), MAXLEN);
|
snprintf(node_info->peer_state_name, sizeof(node_info->peer_state_name), "%s", PQgetvalue(res, row, 5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -134,8 +134,8 @@ typedef struct s_node_info
|
|||||||
int node_id;
|
int node_id;
|
||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
t_server_type type;
|
t_server_type type;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char upstream_node_name[MAXLEN];
|
char upstream_node_name[NAMEDATALEN];
|
||||||
char conninfo[MAXLEN];
|
char conninfo[MAXLEN];
|
||||||
char repluser[NAMEDATALEN];
|
char repluser[NAMEDATALEN];
|
||||||
char location[MAXLEN];
|
char location[MAXLEN];
|
||||||
@@ -302,6 +302,7 @@ typedef struct BdrNodeInfoList
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
char current_timestamp[MAXLEN];
|
char current_timestamp[MAXLEN];
|
||||||
|
bool in_recovery;
|
||||||
XLogRecPtr last_wal_receive_lsn;
|
XLogRecPtr last_wal_receive_lsn;
|
||||||
XLogRecPtr last_wal_replay_lsn;
|
XLogRecPtr last_wal_replay_lsn;
|
||||||
char last_xact_replay_timestamp[MAXLEN];
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
@@ -517,6 +518,7 @@ int wait_connection_availability(PGconn *conn, int timeout);
|
|||||||
|
|
||||||
/* node availability functions */
|
/* node availability functions */
|
||||||
bool is_server_available(const char *conninfo);
|
bool is_server_available(const char *conninfo);
|
||||||
|
bool is_server_available_quiet(const char *conninfo);
|
||||||
bool is_server_available_params(t_conninfo_param_list *param_list);
|
bool is_server_available_params(t_conninfo_param_list *param_list);
|
||||||
ExecStatusType connection_ping(PGconn *conn);
|
ExecStatusType connection_ping(PGconn *conn);
|
||||||
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
||||||
|
|||||||
20
dirutil.c
20
dirutil.c
@@ -276,6 +276,8 @@ is_pg_running(const char *path)
|
|||||||
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fclose(pidf);
|
||||||
|
|
||||||
return PG_DIR_NOT_RUNNING;
|
return PG_DIR_NOT_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -334,6 +336,15 @@ create_pg_dir(const char *path, bool force)
|
|||||||
{
|
{
|
||||||
log_notice(_("-F/--force provided - deleting existing data directory \"%s\""), path);
|
log_notice(_("-F/--force provided - deleting existing data directory \"%s\""), path);
|
||||||
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||||
|
|
||||||
|
/* recreate the directory ourselves to ensure permissions are correct */
|
||||||
|
if (!create_dir(path))
|
||||||
|
{
|
||||||
|
log_error(_("unable to create directory \"%s\"..."),
|
||||||
|
path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -345,6 +356,15 @@ create_pg_dir(const char *path, bool force)
|
|||||||
{
|
{
|
||||||
log_notice(_("deleting existing directory \"%s\""), path);
|
log_notice(_("deleting existing directory \"%s\""), path);
|
||||||
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||||
|
|
||||||
|
/* recreate the directory ourselves to ensure permissions are correct */
|
||||||
|
if (!create_dir(path))
|
||||||
|
{
|
||||||
|
log_error(_("unable to create directory \"%s\"..."),
|
||||||
|
path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -481,28 +481,6 @@ repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
|||||||
|
|
||||||
<sect2 id="packages-old-versions-rhel-centos" xreflabel="old RHEL/CentOS package versions">
|
<sect2 id="packages-old-versions-rhel-centos" xreflabel="old RHEL/CentOS package versions">
|
||||||
<title>RHEL/CentOS</title>
|
<title>RHEL/CentOS</title>
|
||||||
<para>
|
|
||||||
Old RPM packages (<literal>3.2</literal> and later) can be retrieved from the
|
|
||||||
(deprecated) 2ndQuadrant repository at
|
|
||||||
<ulink url="http://packages.2ndquadrant.com/">http://packages.2ndquadrant.com/</ulink>
|
|
||||||
by installing the appropriate repository RPM:
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Old versions can be located with e.g.:
|
Old versions can be located with e.g.:
|
||||||
@@ -520,6 +498,32 @@ repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
|||||||
yum install repmgr96-4.0.6-1.rhel6</programlisting>
|
yum install repmgr96-4.0.6-1.rhel6</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect3 id="packages-old-versions-rhel-centos-repmgr3">
|
||||||
|
<title>repmgr 3 packages</title>
|
||||||
|
<para>
|
||||||
|
Old &repmgr; 3 RPM packages (<literal>3.2</literal> and later) can be retrieved from the
|
||||||
|
(deprecated) 2ndQuadrant repository at
|
||||||
|
<ulink url="http://packages.2ndquadrant.com/repmgr/yum/">http://packages.2ndquadrant.com/repmgr/yum/</ulink>
|
||||||
|
by installing the appropriate repository RPM:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</sect3>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|||||||
@@ -14,13 +14,47 @@
|
|||||||
<para>
|
<para>
|
||||||
See also: <xref linkend="upgrading-repmgr">
|
See also: <xref linkend="upgrading-repmgr">
|
||||||
</para>
|
</para>
|
||||||
|
<sect1 id="release-4.3.1">
|
||||||
|
<title>Release 4.3.1</title>
|
||||||
|
<para><emphasis>??? December ??, 2019</emphasis></para>
|
||||||
|
<para>
|
||||||
|
&repmgr; 4.3.1 is a minor release.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Bug fixes</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>:
|
||||||
|
ensure an existing replication slot is not deleted if the
|
||||||
|
follow target is the node's current upstream.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="release-4.3">
|
<sect1 id="release-4.3">
|
||||||
<title>Release 4.3</title>
|
<title>Release 4.3</title>
|
||||||
<para><emphasis>Mar ???, 2019</emphasis></para>
|
<para><emphasis>Tue April 2, 2019</emphasis></para>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4.3 is a major release.
|
&repmgr; 4.3 is a major release.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
For details on how to upgrade an existing &repmgr; instrallation, see
|
||||||
|
documentation section <link linkend="upgrading-major-version">Upgrading a major version release</link>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If <application>repmgrd</application> is in use, a PostgreSQL restart <emphasis>is</emphasis> required;
|
||||||
|
in that case we suggest combining this &repmgr; upgrade with the next PostgreSQL
|
||||||
|
minor release, which will require a PostgreSQL restart in any case.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
<important>
|
<important>
|
||||||
<para>
|
<para>
|
||||||
@@ -37,7 +71,7 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
</important>
|
</important>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>repmgr enhancements</title>
|
<title>repmgr client enhancements</title>
|
||||||
<para>
|
<para>
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
|
|
||||||
@@ -152,13 +186,13 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application> will no longer consider nodes where <application>repmgrd</application>
|
<application>repmgrd</application> will no longer consider nodes where <application>repmgrd</application>
|
||||||
is not running as promotion candidates.
|
is not running as promotion candidates.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Previously, if <application>repmgrd</application> was not running on a node, but
|
||||||
|
that node qualified as the promotion candidate, it would never be promoted due to
|
||||||
|
the absence of a running <application>repmgrd</application>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
Previously, if <application>repmgrd</application> was not running on a node, but
|
|
||||||
that node qualified as the promotion candidate, it would never be promoted due to
|
|
||||||
the absence of a running <application>repmgrd</application>.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
@@ -188,6 +222,14 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
In a failover situation, <application>repmgrd</application> will not attempt to promote a
|
||||||
|
node if another primary has already appeared (e.g. by being promoted manually).
|
||||||
|
GitHub #420.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
@@ -197,6 +239,35 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
<para>
|
<para>
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>:
|
||||||
|
fix display of node IDs with multiple digits.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ensure <command><link linkend="repmgr-primary-unregister">repmgr primary unregister</link></command>
|
||||||
|
behaves correctly when executed on a witness server. GitHub #548.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ensure <command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
|
||||||
|
fails when <option>--upstream-node-id</option> is the same as the local node ID.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr;: when executing <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>,
|
||||||
|
recheck primary/upstream connection(s) after the data copy operation is complete, as these may
|
||||||
|
have gone away.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
&repmgr;: when executing <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>,
|
&repmgr;: when executing <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>,
|
||||||
@@ -207,16 +278,8 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
&repmgr;: when executing <command><link linkend="repmgr-witness-register">repmgr witness register</link></command>,
|
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
chech the node to connected is actually the primary (i.e. not the witness server). GitHub #528.
|
verify the standby (promotion candidate) is currently attached to the primary (demotion candidate). GitHub #519.
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
&repmgr;: when executing <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>,
|
|
||||||
recheck primary/upstream connection(s) after the data copy operation is complete, as these may
|
|
||||||
have gone away.
|
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
@@ -224,18 +287,26 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
<para>
|
<para>
|
||||||
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||||
avoid a potential race condition when comparing received WAL on the standby to the primary's shutdown location,
|
avoid a potential race condition when comparing received WAL on the standby to the primary's shutdown location,
|
||||||
as the standby's walreceiver may not have yet flushed all received WAL to disk. GitHub #518.
|
as the standby's walreceiver may not have yet flushed all received WAL to disk. GitHub #518.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
<listitem>
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
&repmgr;: when executing <command><link linkend="repmgr-witness-register">repmgr witness register</link></command>,
|
||||||
verify the standby (promotion candidate) is currently attached to the primary (demotion candidate). GitHub #519.
|
check the node to connected is actually the primary (i.e. not the witness server). GitHub #528.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command><link linkend="repmgr-node-check">repmgr node check</link></command>
|
||||||
|
will only consider physical replication slots, as the purpose
|
||||||
|
of slot checks is to warn about potential issues with
|
||||||
|
streaming replication standbys which are no longer attached.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<application>repmgrd</application>: on a cascaded standby, don't fail over if
|
<application>repmgrd</application>: on a cascaded standby, don't fail over if
|
||||||
@@ -243,29 +314,6 @@ REPMGRD_OPTS="--daemonize=false"</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>:
|
|
||||||
fix display of node IDs with multiple digits.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
ensure <command><link linkend="repmgr-primary-unregister">repmgr primary unregister</link></command>
|
|
||||||
behaves correctly when executed on a witness server. GitHub #548.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<command><link linkend="repmgr-node-check">repmgr node check</link></command>
|
|
||||||
will only consider physical replication slots, as the purpose
|
|
||||||
of slot checks is to warn about potential issues with
|
|
||||||
streaming replication standbys which are no longer attached.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
@@ -39,6 +39,10 @@
|
|||||||
called <varname>standby1</varname> (for example), things will be confusing
|
called <varname>standby1</varname> (for example), things will be confusing
|
||||||
to say the least.
|
to say the least.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
The string's maximum length is 63 characters and it should
|
||||||
|
contain only printable ASCII characters.
|
||||||
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
|||||||
@@ -1,93 +0,0 @@
|
|||||||
<chapter id="using-witness-server">
|
|
||||||
<indexterm>
|
|
||||||
<primary>witness server</primary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
|
|
||||||
<title>Using a witness server</title>
|
|
||||||
<para>
|
|
||||||
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
|
||||||
is not part of the streaming replication cluster; its purpose is, if a
|
|
||||||
failover situation occurs, to provide proof that it is the primary server
|
|
||||||
itself which is unavailable, rather than e.g. a network split between
|
|
||||||
different physical locations.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
A typical use case for a witness server is a two-node streaming replication
|
|
||||||
setup, where the primary and standby are in different locations (data centres).
|
|
||||||
By creating a witness server in the same location (data centre) as the primary,
|
|
||||||
if the primary becomes unavailable it's possible for the standby to decide whether
|
|
||||||
it can promote itself without risking a "split brain" scenario: if it can't see either the
|
|
||||||
witness or the primary server, it's likely there's a network-level interruption
|
|
||||||
and it should not promote itself. If it can see the witness but not the primary,
|
|
||||||
this proves there is no network interruption and the primary itself is unavailable,
|
|
||||||
and it can therefore promote itself (and ideally take action to fence the
|
|
||||||
former primary).
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
<emphasis>Never</emphasis> install a witness server on the same physical host
|
|
||||||
as another node in the replication cluster managed by &repmgr; - it's essential
|
|
||||||
the witness is not affected in any way by failure of another node.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
|
||||||
be preferable to use location-based failover, which ensures that only nodes
|
|
||||||
in the same location as the primary will ever be promotion candidates;
|
|
||||||
see <xref linkend="repmgrd-network-split"> for more details.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
A witness server will only be useful if <application>repmgrd</application>
|
|
||||||
is in use.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<sect1 id="creating-witness-server">
|
|
||||||
<title>Creating a witness server</title>
|
|
||||||
<para>
|
|
||||||
To create a witness server, set up a normal PostgreSQL instance on a server
|
|
||||||
in the same physical location as the cluster's primary server.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This instance should <emphasis>not</emphasis> be on the same physical host as the primary server,
|
|
||||||
as otherwise if the primary server fails due to hardware issues, the witness
|
|
||||||
server will be lost too.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
|
||||||
command, which would automatically create a PostgreSQL instance. However
|
|
||||||
this often resulted in an unsatisfactory, hard-to-customise instance.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
The witness server should be configured in the same way as a normal
|
|
||||||
&repmgr; node; see section <xref linkend="configuration">.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Register the witness server with <xref linkend="repmgr-witness-register">.
|
|
||||||
This will create the &repmgr; extension on the witness server, and make
|
|
||||||
a copy of the &repmgr; metadata.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
As the witness server is not part of the replication cluster, further
|
|
||||||
changes to the &repmgr; metadata will be synchronised by
|
|
||||||
<application>repmgrd</application>.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
Once the witness server has been configured, <application>repmgrd</application>
|
|
||||||
should be started; for more details see <xref linkend="repmgrd-witness-server">.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
@@ -45,7 +45,6 @@
|
|||||||
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
||||||
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
||||||
<!ENTITY switchover SYSTEM "switchover.sgml">
|
<!ENTITY switchover SYSTEM "switchover.sgml">
|
||||||
<!ENTITY configuring-witness-server SYSTEM "configuring-witness-server.sgml">
|
|
||||||
|
|
||||||
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
||||||
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
||||||
|
|||||||
@@ -61,28 +61,28 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
|||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibedit-dev</literal></simpara>
|
<simpara><literal>libedit-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibkrb5-dev</literal></simpara>
|
<simpara><literal>libkrb5-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibpam0g-dev</literal></simpara>
|
<simpara><literal>libpam0g-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibreadline-dev</literal></simpara>
|
<simpara><literal>libreadline-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibselinux1-dev</literal></simpara>
|
<simpara><literal>libselinux1-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibssl-dev</literal></simpara>
|
<simpara><literal>libssl-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibxml2-dev</literal></simpara>
|
<simpara><literal>libxml2-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara><literal>llibxslt1-dev</literal></simpara>
|
<simpara><literal>libxslt1-dev</literal></simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
@@ -136,6 +136,16 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
|||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
If building against PostgreSQL 11 or later configured with the <option>--with-llvm</option> option
|
||||||
|
(this is the case with the PGDG-provided packages) you'll also need to install the
|
||||||
|
<literal>llvm-toolset-7-clang</literal> package. This is available via the
|
||||||
|
<ulink url="https://wiki.centos.org/AdditionalResources/Repositories/SCL">Software Collections (SCL) Repository</ulink>.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|||||||
@@ -76,19 +76,25 @@
|
|||||||
</para>
|
</para>
|
||||||
<programlisting>
|
<programlisting>
|
||||||
|
|
||||||
# Enable replication connections; set this figure to at least one more
|
# Enable replication connections; set this value to at least one more
|
||||||
# than the number of standbys which will connect to this server
|
# than the number of standbys which will connect to this server
|
||||||
# (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
|
# (note that repmgr will execute "pg_basebackup" in WAL streaming mode,
|
||||||
# which requires two free WAL senders)
|
# which requires two free WAL senders).
|
||||||
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS
|
||||||
|
|
||||||
max_wal_senders = 10
|
max_wal_senders = 10
|
||||||
|
|
||||||
# Enable replication slots; set this figure to at least one more
|
# If using replication slots, set this value to at least one more
|
||||||
# than the number of standbys which will connect to this server.
|
# than the number of standbys which will connect to this server.
|
||||||
# Note that repmgr will only make use of replication slots if
|
# Note that repmgr will only make use of replication slots if
|
||||||
# "use_replication_slots" is set to "true" in repmgr.conf
|
# "use_replication_slots" is set to "true" in "repmgr.conf".
|
||||||
|
# (If you are not intending to use replication slots, this value
|
||||||
|
# can be set to "0").
|
||||||
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-REPLICATION-SLOTS
|
||||||
|
|
||||||
max_replication_slots = 0
|
max_replication_slots = 10
|
||||||
|
|
||||||
# Ensure WAL files contain enough information to enable read-only queries
|
# Ensure WAL files contain enough information to enable read-only queries
|
||||||
# on the standby.
|
# on the standby.
|
||||||
@@ -103,24 +109,31 @@
|
|||||||
|
|
||||||
# Enable read-only queries on a standby
|
# Enable read-only queries on a standby
|
||||||
# (Note: this will be ignored on a primary but we recommend including
|
# (Note: this will be ignored on a primary but we recommend including
|
||||||
# it anyway)
|
# it anyway, in case the primary later becomes a standby)
|
||||||
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY
|
||||||
|
|
||||||
hot_standby = on
|
hot_standby = on
|
||||||
|
|
||||||
# Enable WAL file archiving
|
# Enable WAL file archiving
|
||||||
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE
|
||||||
|
|
||||||
archive_mode = on
|
archive_mode = on
|
||||||
|
|
||||||
# Set archive command to a script or application that will safely store
|
# Set archive command to a dummy command; this can later be changed without
|
||||||
# you WALs in a secure place. /bin/true is an example of a command that
|
# needing to restart the PostgreSQL instance.
|
||||||
# ignores archiving. Use something more sensible.
|
#
|
||||||
|
# See: https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND
|
||||||
|
|
||||||
archive_command = '/bin/true'
|
archive_command = '/bin/true'
|
||||||
</programlisting>
|
</programlisting>
|
||||||
<tip>
|
<tip>
|
||||||
<simpara>
|
<simpara>
|
||||||
Rather than editing these settings in the default <filename>postgresql.conf</filename>
|
Rather than editing these settings in the default <filename>postgresql.conf</filename>
|
||||||
file, create a separate file such as <filename>postgresql.replication.conf</filename> and
|
file, create a separate file such as <filename>postgresql.replication.conf</filename> and
|
||||||
include it from the end of the main configuration file with:
|
include it from the end of the main configuration file with:
|
||||||
<command>include 'postgresql.replication.conf</command>.
|
<command>include 'postgresql.replication.conf'</command>.
|
||||||
</simpara>
|
</simpara>
|
||||||
</tip>
|
</tip>
|
||||||
<para>
|
<para>
|
||||||
@@ -129,7 +142,8 @@
|
|||||||
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the <link linkend="configuration">repmgr configuaration guide</link>.
|
See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the
|
||||||
|
<link linkend="configuration">repmgr configuration guide</link>.
|
||||||
</para>
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|||||||
@@ -196,11 +196,31 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
An issue was encountered while attempting to retrieve
|
||||||
|
&repmgr; metadata.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>ERR_DB_CONN (6)</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgr; was unable to connect to the local PostgreSQL instance.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
One or more issues were detected.
|
One or more issues were detected with the replication configuration,
|
||||||
|
e.g. a node was not in its expected state.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|||||||
@@ -22,10 +22,10 @@
|
|||||||
passwordless SSH connection to the current primary.
|
passwordless SSH connection to the current primary.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If other standbys are connected to the demotion candidate, &repmgr; can instruct
|
If other nodes are connected to the demotion candidate, &repmgr; can instruct
|
||||||
these to follow the new primary if the option <literal>--siblings-follow</literal>
|
these to follow the new primary if the option <literal>--siblings-follow</literal>
|
||||||
is specified. This requires a passwordless SSH connection between the promotion
|
is specified. This requires a passwordless SSH connection between the promotion
|
||||||
candidate (new primary) and the standbys attached to the demotion candidate
|
candidate (new primary) and the nodes attached to the demotion candidate
|
||||||
(existing primary).
|
(existing primary).
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
@@ -150,8 +150,18 @@
|
|||||||
<term><option>--siblings-follow</option></term>
|
<term><option>--siblings-follow</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Have standbys attached to the old primary follow the new primary.
|
Have nodes attached to the old primary follow the new primary.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
This will also ensure that a witness node, if in use, is updated
|
||||||
|
with the new primary's data.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
In a future &repmgr; release, <option>--siblings-follow</option> will be applied
|
||||||
|
by default.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
%filelist;
|
%filelist;
|
||||||
|
|
||||||
<!ENTITY repmgr "<productname>repmgr</productname>">
|
<!ENTITY repmgr "<productname>repmgr</productname>">
|
||||||
|
<!ENTITY repmgrd "<productname>repmgrd</productname>">
|
||||||
<!ENTITY postgres "<productname>PostgreSQL</productname>">
|
<!ENTITY postgres "<productname>PostgreSQL</productname>">
|
||||||
]>
|
]>
|
||||||
|
|
||||||
@@ -25,7 +26,13 @@
|
|||||||
<para>
|
<para>
|
||||||
This is the official documentation of &repmgr; &repmgrversion; for
|
This is the official documentation of &repmgr; &repmgrversion; for
|
||||||
use with PostgreSQL 9.3 - PostgreSQL 11.
|
use with PostgreSQL 9.3 - PostgreSQL 11.
|
||||||
It describes the functionality supported by the current version of &repmgr;.
|
</para>
|
||||||
|
<para>
|
||||||
|
&repmgr; is being continually developed and we strongly recommend using the
|
||||||
|
latest version. Please check the
|
||||||
|
<ulink url="https://repmgr.org/">repmgr website</ulink> for details
|
||||||
|
about the current &repmgr; version as well as the
|
||||||
|
<ulink url="https://repmgr.org/docs/current/index.html">current repmgr documentation</ulink>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@@ -73,7 +80,6 @@
|
|||||||
&promoting-standby;
|
&promoting-standby;
|
||||||
&follow-new-primary;
|
&follow-new-primary;
|
||||||
&switchover;
|
&switchover;
|
||||||
&configuring-witness-server;
|
|
||||||
&event-notifications;
|
&event-notifications;
|
||||||
&upgrading-repmgr;
|
&upgrading-repmgr;
|
||||||
</part>
|
</part>
|
||||||
|
|||||||
@@ -23,37 +23,92 @@
|
|||||||
<primary>witness server</primary>
|
<primary>witness server</primary>
|
||||||
<secondary>repmgrd</secondary>
|
<secondary>repmgrd</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
<title>Using a witness server</title>
|
||||||
|
<para>
|
||||||
|
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
||||||
|
is not part of the streaming replication cluster; its purpose is, if a
|
||||||
|
failover situation occurs, to provide proof that it is the primary server
|
||||||
|
itself which is unavailable, rather than e.g. a network split between
|
||||||
|
different physical locations.
|
||||||
|
</para>
|
||||||
|
|
||||||
<title>Using a witness server with repmgrd</title>
|
|
||||||
<para>
|
<para>
|
||||||
In a situation caused e.g. by a network interruption between two
|
A typical use case for a witness server is a two-node streaming replication
|
||||||
data centres, it's important to avoid a "split-brain" situation where
|
setup, where the primary and standby are in different locations (data centres).
|
||||||
both sides of the network assume they are the active segment and the
|
By creating a witness server in the same location (data centre) as the primary,
|
||||||
side without an active primary unilaterally promotes one of its standbys.
|
if the primary becomes unavailable it's possible for the standby to decide whether
|
||||||
</para>
|
it can promote itself without risking a "split brain" scenario: if it can't see either the
|
||||||
<para>
|
witness or the primary server, it's likely there's a network-level interruption
|
||||||
To prevent this situation happening, it's essential to ensure that one
|
and it should not promote itself. If it can see the witness but not the primary,
|
||||||
network segment has a "voting majority", so other segments will know
|
this proves there is no network interruption and the primary itself is unavailable,
|
||||||
they're in the minority and not attempt to promote a new primary. Where
|
and it can therefore promote itself (and ideally take action to fence the
|
||||||
an odd number of servers exists, this is not an issue. However, if each
|
former primary).
|
||||||
network has an even number of nodes, it's necessary to provide some way
|
|
||||||
of ensuring a majority, which is where the witness server becomes useful.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This is not a fully-fledged standby node and is not integrated into
|
|
||||||
replication, but it effectively represents the "casting vote" when
|
|
||||||
deciding which network segment has a majority. A witness server can
|
|
||||||
be set up using <link linkend="repmgr-witness-register"><command>repmgr witness register</command></link>;
|
|
||||||
see also section <link linkend="using-witness-server">Using a witness server</link>.
|
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
It only
|
<emphasis>Never</emphasis> install a witness server on the same physical host
|
||||||
makes sense to create a witness server in conjunction with running
|
as another node in the replication cluster managed by &repmgr; - it's essential
|
||||||
<application>repmgrd</application>; the witness server will require its own
|
the witness is not affected in any way by failure of another node.
|
||||||
<application>repmgrd</application> instance.
|
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
<para>
|
||||||
|
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
||||||
|
be preferable to use location-based failover, which ensures that only nodes
|
||||||
|
in the same location as the primary will ever be promotion candidates;
|
||||||
|
see <xref linkend="repmgrd-network-split"> for more details.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
A witness server will only be useful if <application>repmgrd</application>
|
||||||
|
is in use.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
<sect2 id="creating-witness-server">
|
||||||
|
<title>Creating a witness server</title>
|
||||||
|
<para>
|
||||||
|
To create a witness server, set up a normal PostgreSQL instance on a server
|
||||||
|
in the same physical location as the cluster's primary server.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This instance should <emphasis>not</emphasis> be on the same physical host as the primary server,
|
||||||
|
as otherwise if the primary server fails due to hardware issues, the witness
|
||||||
|
server will be lost too.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
||||||
|
command, which would automatically create a PostgreSQL instance. However
|
||||||
|
this often resulted in an unsatisfactory, hard-to-customise instance.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
The witness server should be configured in the same way as a normal
|
||||||
|
&repmgr; node; see section <xref linkend="configuration">.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Register the witness server with <xref linkend="repmgr-witness-register">.
|
||||||
|
This will create the &repmgr; extension on the witness server, and make
|
||||||
|
a copy of the &repmgr; metadata.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
As the witness server is not part of the replication cluster, further
|
||||||
|
changes to the &repmgr; metadata will be synchronised by
|
||||||
|
<application>repmgrd</application>.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
Once the witness server has been configured, <application>repmgrd</application>
|
||||||
|
should be started.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
@@ -150,10 +205,6 @@
|
|||||||
<para>
|
<para>
|
||||||
Following the failover operation, no matter what the outcome, each node will reconnect its WAL receiver.
|
Following the failover operation, no matter what the outcome, each node will reconnect its WAL receiver.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
If using <option>standby_disconnect_on_failover</option>, we recommend that the
|
|
||||||
<option>primary_visibility_consensus</option> option is also used.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
<title>BDR failover with repmgrd</title>
|
<title>BDR failover with repmgrd</title>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4.x provides support for monitoring BDR nodes and taking action in
|
&repmgr; 4.x provides support for monitoring a pair of BDR 2.x nodes and taking action in
|
||||||
case one of the nodes fails.
|
case one of the nodes fails.
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
@@ -31,8 +31,21 @@
|
|||||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<simpara>
|
||||||
|
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
||||||
|
It is <emphasis>not</emphasis> required for later BDR versions.
|
||||||
|
</simpara>
|
||||||
|
</note>
|
||||||
|
|
||||||
<sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
|
<sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
|
||||||
<title>Prerequisites</title>
|
<title>Prerequisites</title>
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
||||||
|
It is <emphasis>not</emphasis> required for later BDR versions.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
|
&repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
|
||||||
enabled and configured for a two-node BDR network. &repmgr; 4 packages
|
enabled and configured for a two-node BDR network. &repmgr; 4 packages
|
||||||
|
|||||||
@@ -53,7 +53,7 @@
|
|||||||
|
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry id="connection-check-type">
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>connection_check_type</primary>
|
<primary>connection_check_type</primary>
|
||||||
@@ -217,7 +217,7 @@
|
|||||||
<command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command> command.
|
<command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command> command.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
It is also possible to provide e.g. a shell script to e.g. perform user-defined tasks
|
It is also possible to provide a shell script to e.g. perform user-defined tasks
|
||||||
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
||||||
at some point execute <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
at some point execute <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
||||||
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
||||||
@@ -257,7 +257,7 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Normally <option>follow_command</option> is set as &repmgr;'s
|
Normally <option>follow_command</option> is set as &repmgr;'s
|
||||||
<command><link linkend="repmgr-standby-follow">repmgr standby promote</link></command> command.
|
<command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command> command.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The <option>follow_command</option> parameter
|
The <option>follow_command</option> parameter
|
||||||
@@ -270,7 +270,7 @@
|
|||||||
the original primary.
|
the original primary.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
It is also possible to provide e.g. a shell script to e.g. perform user-defined tasks
|
It is also possible to provide a shell script to e.g. perform user-defined tasks
|
||||||
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
before promoting the current node. In this case the script <emphasis>must</emphasis>
|
||||||
at some point execute <command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>
|
at some point execute <command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>
|
||||||
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
to promote the node; if this is not done, &repmgr; metadata will not be updated and
|
||||||
@@ -364,27 +364,6 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>primary_visibility_consensus</primary>
|
|
||||||
</indexterm>
|
|
||||||
<term><option>primary_visibility_consensus</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
If <literal>true</literal>, only continue with failover if no standbys have seen
|
|
||||||
the primary node recently.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
This option <emphasis>must</emphasis> be identically configured
|
|
||||||
on all nodes.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
@@ -676,12 +655,6 @@ repmgrd_service_stop_command='sudo systemctl repmgr11 stop'
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<varname>primary_visibility_consensus</varname>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<varname>promote_command</varname>
|
<varname>promote_command</varname>
|
||||||
|
|||||||
@@ -13,6 +13,66 @@
|
|||||||
failover and updating standbys to follow the new primary, as well as
|
failover and updating standbys to follow the new primary, as well as
|
||||||
providing monitoring information about the state of each standby.
|
providing monitoring information about the state of each standby.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application> is designed to be straightforward to set up
|
||||||
|
and does not require additional external infrastructure.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Functionality provided by <application>repmgrd</application> includes:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
wide range of <link linkend="repmgrd-basic-configuration">configuration options</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
option to execute custom scripts ("<link linkend="event-notifications">event notifications</link>
|
||||||
|
at different points in the failover sequence
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
ability to <link linkend="repmgrd-pausing">pause repmgrd</link>
|
||||||
|
operation on all nodes with a
|
||||||
|
<link linkend="repmgr-daemon-pause"><command>single command</command></link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
optional <link linkend="repmgrd-witness-server">witness server</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
"location" configuration option to restrict
|
||||||
|
potential promotion candidates to a single location
|
||||||
|
(e.g. when nodes are spread over multiple data centres)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="connection-check-type">choice of method</link> to determine node availability
|
||||||
|
(PostgreSQL ping, query execution or new connection)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
retention of monitoring statistics (optional)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
<sect1 id="repmgrd-demonstration">
|
<sect1 id="repmgrd-demonstration">
|
||||||
|
|
||||||
@@ -22,12 +82,12 @@
|
|||||||
and two standbys streaming directly from the primary) so that the cluster looks
|
and two standbys streaming directly from the primary) so that the cluster looks
|
||||||
something like this:
|
something like this:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
----+-------+---------+-----------+----------+----------+-------
|
||||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | * running | | default | 100
|
||||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
2 | node2 | standby | running | node1 | default | 100
|
||||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
3 | node3 | standby | running | node1 | default | 100</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<tip>
|
<tip>
|
||||||
@@ -40,10 +100,11 @@
|
|||||||
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
||||||
log output, which at log level <literal>INFO</literal> will look like this:
|
log output, which at log level <literal>INFO</literal> will look like this:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
[2017-08-24 17:31:00] [NOTICE] using configuration file "/etc/repmgr.conf"
|
[2019-03-15 06:32:05] [NOTICE] repmgrd (repmgrd 4.3) starting up
|
||||||
[2017-08-24 17:31:00] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr"
|
[2019-03-15 06:32:05] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr connect_timeout=2"
|
||||||
[2017-08-24 17:31:00] [NOTICE] starting monitoring of node <literal>node2</literal> (ID: 2)
|
INFO: set_repmgrd_pid(): provided pidfile is /var/run/repmgr/repmgrd-11.pid
|
||||||
[2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
[2019-03-15 06:32:05] [NOTICE] starting monitoring of node "node2" (ID: 2)
|
||||||
|
[2019-03-15 06:32:05] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
||||||
@@ -51,9 +112,9 @@
|
|||||||
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
||||||
Node ID | Name | Event | OK | Timestamp | Details
|
Node ID | Name | Event | OK | Timestamp | Details
|
||||||
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
||||||
3 | node3 | repmgrd_start | t | 2017-08-24 17:35:54 | monitoring connection to upstream node "node1" (node ID: 1)
|
3 | node3 | repmgrd_start | t | 2019-03-14 04:17:30 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||||
2 | node2 | repmgrd_start | t | 2017-08-24 17:35:50 | monitoring connection to upstream node "node1" (node ID: 1)
|
2 | node2 | repmgrd_start | t | 2019-03-14 04:11:47 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||||
1 | node1 | repmgrd_start | t | 2017-08-24 17:35:46 | monitoring cluster primary "node1" (node ID: 1) </programlisting>
|
1 | node1 | repmgrd_start | t | 2019-03-14 04:04:31 | monitoring cluster primary "node1" (node ID: 1)</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Now stop the current primary server with e.g.:
|
Now stop the current primary server with e.g.:
|
||||||
@@ -67,55 +128,59 @@
|
|||||||
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
||||||
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
||||||
<programlisting>
|
<programlisting>
|
||||||
[2017-08-24 23:32:01] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state
|
[2019-03-15 06:37:50] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||||
[2017-08-24 23:32:08] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
[2019-03-15 06:37:50] [INFO] checking state of node 1, 1 of 3 attempts
|
||||||
[2017-08-24 23:32:08] [INFO] checking state of node 1, 1 of 5 attempts
|
[2019-03-15 06:37:50] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||||
[2017-08-24 23:32:08] [INFO] sleeping 1 seconds until next reconnection attempt
|
[2019-03-15 06:37:55] [INFO] checking state of node 1, 2 of 3 attempts
|
||||||
[2017-08-24 23:32:09] [INFO] checking state of node 1, 2 of 5 attempts
|
[2019-03-15 06:37:55] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||||
[2017-08-24 23:32:09] [INFO] sleeping 1 seconds until next reconnection attempt
|
[2019-03-15 06:38:00] [INFO] checking state of node 1, 3 of 3 attempts
|
||||||
[2017-08-24 23:32:10] [INFO] checking state of node 1, 3 of 5 attempts
|
[2019-03-15 06:38:00] [WARNING] unable to reconnect to node 1 after 3 attempts
|
||||||
[2017-08-24 23:32:10] [INFO] sleeping 1 seconds until next reconnection attempt
|
[2019-03-15 06:38:00] [INFO] primary and this node have the same location ("default")
|
||||||
[2017-08-24 23:32:11] [INFO] checking state of node 1, 4 of 5 attempts
|
[2019-03-15 06:38:00] [INFO] local node's last receive lsn: 0/900CBF8
|
||||||
[2017-08-24 23:32:11] [INFO] sleeping 1 seconds until next reconnection attempt
|
[2019-03-15 06:38:00] [INFO] node 3 last saw primary node 12 second(s) ago
|
||||||
[2017-08-24 23:32:12] [INFO] checking state of node 1, 5 of 5 attempts
|
[2019-03-15 06:38:00] [INFO] last receive LSN for sibling node "node3" (ID: 3) is: 0/900CBF8
|
||||||
[2017-08-24 23:32:12] [WARNING] unable to reconnect to node 1 after 5 attempts
|
[2019-03-15 06:38:00] [INFO] node "node3" (ID: 3) has same LSN as current candidate "node2" (ID: 2)
|
||||||
INFO: setting voting term to 1
|
[2019-03-15 06:38:00] [INFO] visible nodes: 2; total nodes: 2; no nodes have seen the primary within the last 4 seconds
|
||||||
INFO: node 2 is candidate
|
[2019-03-15 06:38:00] [NOTICE] promotion candidate is "node2" (ID: 2)
|
||||||
INFO: node 3 has received request from node 2 for electoral term 1 (our term: 0)
|
[2019-03-15 06:38:00] [NOTICE] this node is the winner, will now promote itself and inform other nodes
|
||||||
[2017-08-24 23:32:12] [NOTICE] this node is the winner, will now promote self and inform other nodes
|
[2019-03-15 06:38:00] [INFO] promote_command is:
|
||||||
INFO: connecting to standby database
|
"/usr/pgsql-11/bin/repmgr -f /etc/repmgr/11/repmgr.conf standby promote"
|
||||||
NOTICE: promoting standby
|
NOTICE: promoting standby to primary
|
||||||
DETAIL: promoting server using 'pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote'
|
DETAIL: promoting server "node2" (ID: 2) using "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"
|
||||||
INFO: reconnecting to promoted server
|
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
|
||||||
NOTICE: STANDBY PROMOTE successful
|
NOTICE: STANDBY PROMOTE successful
|
||||||
DETAIL: node 2 was successfully promoted to primary
|
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
|
||||||
|
[2019-03-15 06:38:01] [INFO] 3 followers to notify
|
||||||
|
[2019-03-15 06:38:01] [NOTICE] notifying node "node3" (node ID: 3) to follow node 2
|
||||||
INFO: node 3 received notification to follow node 2
|
INFO: node 3 received notification to follow node 2
|
||||||
[2017-08-24 23:32:13] [INFO] switching to primary monitoring mode</programlisting>
|
[2019-03-15 06:38:01] [INFO] switching to primary monitoring mode
|
||||||
|
[2019-03-15 06:38:01] [NOTICE] monitoring cluster primary "node2" (node ID: 2)</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
||||||
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
||||||
(<literal>node2</literal>):
|
(<literal>node2</literal>):
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||||
----+-------+---------+-----------+----------+----------+----------------------------------------------------
|
----+-------+---------+-----------+----------+----------+-------
|
||||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | - failed | | default | 100
|
||||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
2 | node2 | primary | * running | | default | 100
|
||||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
3 | node3 | standby | running | node2 | default | 100</programlisting>
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
<command>repmgr cluster event</command> will display a summary of what happened to each server
|
<link linkend="repmgr-cluster-event"><command>repmgr cluster event</command></link> will display a summary of
|
||||||
during the failover:
|
what happened to each server during the failover:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster event
|
$ repmgr -f /etc/repmgr.conf cluster event
|
||||||
Node ID | Name | Event | OK | Timestamp | Details
|
Node ID | Name | Event | OK | Timestamp | Details
|
||||||
---------+-------+--------------------------+----+---------------------+-----------------------------------------------------------------------------------
|
---------+-------+----------------------------+----+---------------------+-------------------------------------------------------------
|
||||||
3 | node3 | repmgrd_failover_follow | t | 2017-08-24 23:32:16 | node 3 now following new upstream node 2
|
3 | node3 | repmgrd_failover_follow | t | 2019-03-15 06:38:03 | node 3 now following new upstream node 2
|
||||||
3 | node3 | standby_follow | t | 2017-08-24 23:32:16 | node 3 is now attached to node 2
|
3 | node3 | standby_follow | t | 2019-03-15 06:38:02 | standby attached to upstream node "node2" (node ID: 2)
|
||||||
2 | node2 | repmgrd_failover_promote | t | 2017-08-24 23:32:13 | node 2 promoted to primary; old primary 1 marked as failed
|
2 | node2 | repmgrd_reload | t | 2019-03-15 06:38:01 | monitoring cluster primary "node2" (node ID: 2)
|
||||||
2 | node2 | standby_promote | t | 2017-08-24 23:32:13 | node 2 was successfully promoted to primary</programlisting>
|
2 | node2 | repmgrd_failover_promote | t | 2019-03-15 06:38:01 | node 2 promoted to primary; old primary 1 marked as failed
|
||||||
|
2 | node2 | standby_promote | t | 2019-03-15 06:38:01 | server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -72,7 +72,8 @@
|
|||||||
Ensure that a passwordless SSH connection is possible from the promotion candidate
|
Ensure that a passwordless SSH connection is possible from the promotion candidate
|
||||||
(standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
|
(standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
|
||||||
will be used, ensure that passwordless SSH connections are possible from the
|
will be used, ensure that passwordless SSH connections are possible from the
|
||||||
promotion candidate to all standbys attached to the demotion candidate.
|
promotion candidate to all nodes attached to the demotion candidate
|
||||||
|
(including the witness server, if in use).
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
|
|||||||
2
log.c
2
log.c
@@ -85,7 +85,7 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
|||||||
|
|
||||||
time(&t);
|
time(&t);
|
||||||
tm = localtime(&t);
|
tm = localtime(&t);
|
||||||
strftime(buf, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
strftime(buf, sizeof(buf), "[%Y-%m-%d %H:%M:%S]", tm);
|
||||||
fprintf(stderr, "%s [%s] ", buf, level_name);
|
fprintf(stderr, "%s [%s] ", buf, level_name);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -93,6 +93,15 @@ do_bdr_register(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (get_bdr_version_num() > 2)
|
||||||
|
{
|
||||||
|
log_error(_("\"repmgr bdr register\" is for BDR 2.x only"));
|
||||||
|
PQfinish(conn);
|
||||||
|
pfree(dbname);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* check for a matching BDR node */
|
/* check for a matching BDR node */
|
||||||
{
|
{
|
||||||
PQExpBufferData bdr_local_node_name;
|
PQExpBufferData bdr_local_node_name;
|
||||||
@@ -216,7 +225,7 @@ do_bdr_register(void)
|
|||||||
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
||||||
|
|
||||||
/* skip the local node */
|
/* skip the local node */
|
||||||
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, MAXLEN) == 0)
|
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -304,9 +313,9 @@ do_bdr_register(void)
|
|||||||
node_info.active = true;
|
node_info.active = true;
|
||||||
node_info.priority = config_file_options.priority;
|
node_info.priority = config_file_options.priority;
|
||||||
|
|
||||||
strncpy(node_info.node_name, config_file_options.node_name, MAXLEN);
|
strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
|
||||||
strncpy(node_info.location, config_file_options.location, MAXLEN);
|
strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
|
||||||
strncpy(node_info.conninfo, config_file_options.conninfo, MAXLEN);
|
strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
if (record_status == RECORD_FOUND)
|
||||||
{
|
{
|
||||||
@@ -330,7 +339,7 @@ do_bdr_register(void)
|
|||||||
* name set when the node was registered.
|
* name set when the node was registered.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (strncmp(node_info.node_name, config_file_options.node_name, MAXLEN) != 0)
|
if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
|
||||||
{
|
{
|
||||||
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
||||||
config_file_options.node_id, node_info.node_name);
|
config_file_options.node_id, node_info.node_name);
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ do_cluster_show(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* check if node is reachable, but just not letting us in */
|
/* check if node is reachable, but just not letting us in */
|
||||||
if (is_server_available(cell->node_info->conninfo))
|
if (is_server_available_quiet(cell->node_info->conninfo))
|
||||||
cell->node_info->node_status = NODE_STATUS_REJECTED;
|
cell->node_info->node_status = NODE_STATUS_REJECTED;
|
||||||
else
|
else
|
||||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||||
@@ -1063,7 +1063,9 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||||
|
|
||||||
matrix_rec_list[i]->node_id = cell->node_info->node_id;
|
matrix_rec_list[i]->node_id = cell->node_info->node_id;
|
||||||
strncpy(matrix_rec_list[i]->node_name, cell->node_info->node_name, MAXLEN);
|
strncpy(matrix_rec_list[i]->node_name,
|
||||||
|
cell->node_info->node_name,
|
||||||
|
sizeof(matrix_rec_list[i]->node_name));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the maximum length of a node name
|
* Find the maximum length of a node name
|
||||||
@@ -1278,7 +1280,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
|
|
||||||
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
||||||
cube[h]->node_id = cell->node_info->node_id;
|
cube[h]->node_id = cell->node_info->node_id;
|
||||||
strncpy(cube[h]->node_name, cell->node_info->node_name, MAXLEN);
|
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cube[h]->node_name));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the maximum length of a node name
|
* Find the maximum length of a node name
|
||||||
@@ -1300,7 +1302,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
/* we don't need the name here */
|
/* we don't need the name here */
|
||||||
cube[h]->matrix_list_rec[i]->node_name[0] = '\0';
|
cube[h]->matrix_list_rec[i]->node_name[0] = '\0';
|
||||||
|
|
||||||
cube[h]->matrix_list_rec[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
cube[h]->matrix_list_rec[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec *) * nodes.node_count);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -30,14 +30,14 @@ typedef struct
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
t_node_status_rec **node_status_list;
|
t_node_status_rec **node_status_list;
|
||||||
} t_node_matrix_rec;
|
} t_node_matrix_rec;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
t_node_matrix_rec **matrix_list_rec;
|
t_node_matrix_rec **matrix_list_rec;
|
||||||
} t_node_status_cube;
|
} t_node_status_cube;
|
||||||
|
|
||||||
|
|||||||
@@ -1408,7 +1408,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (lag_seconds < 0)
|
else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
|
||||||
{
|
{
|
||||||
status = CHECK_STATUS_UNKNOWN;
|
status = CHECK_STATUS_UNKNOWN;
|
||||||
|
|
||||||
@@ -2476,6 +2476,8 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&slotdir_ent_path);
|
termPQExpBuffer(&slotdir_ent_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closedir(slotdir);
|
||||||
}
|
}
|
||||||
termPQExpBuffer(&slotdir_path);
|
termPQExpBuffer(&slotdir_path);
|
||||||
}
|
}
|
||||||
@@ -2784,6 +2786,7 @@ _do_node_archive_config(void)
|
|||||||
|
|
||||||
arcdir = opendir(archive_dir.data);
|
arcdir = opendir(archive_dir.data);
|
||||||
|
|
||||||
|
/* always attempt to open the directory */
|
||||||
if (arcdir == NULL)
|
if (arcdir == NULL)
|
||||||
{
|
{
|
||||||
log_error(_("unable to open archive directory \"%s\""),
|
log_error(_("unable to open archive directory \"%s\""),
|
||||||
@@ -2829,10 +2832,11 @@ _do_node_archive_config(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&arcdir_ent_path);
|
termPQExpBuffer(&arcdir_ent_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
closedir(arcdir);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closedir(arcdir);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* extract list of config files from --config-files
|
* extract list of config files from --config-files
|
||||||
*/
|
*/
|
||||||
@@ -3104,11 +3108,12 @@ copy_file(const char *src_file, const char *dest_file)
|
|||||||
int a = 0;
|
int a = 0;
|
||||||
|
|
||||||
ptr_old = fopen(src_file, "r");
|
ptr_old = fopen(src_file, "r");
|
||||||
ptr_new = fopen(dest_file, "w");
|
|
||||||
|
|
||||||
if (ptr_old == NULL)
|
if (ptr_old == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
ptr_new = fopen(dest_file, "w");
|
||||||
|
|
||||||
if (ptr_new == NULL)
|
if (ptr_new == NULL)
|
||||||
{
|
{
|
||||||
fclose(ptr_old);
|
fclose(ptr_old);
|
||||||
|
|||||||
@@ -96,28 +96,6 @@ do_primary_register(void)
|
|||||||
|
|
||||||
initialize_voting_term(conn);
|
initialize_voting_term(conn);
|
||||||
|
|
||||||
/* Ensure there isn't another registered node which is primary */
|
|
||||||
primary_conn = get_primary_connection(conn, ¤t_primary_id, NULL);
|
|
||||||
|
|
||||||
if (primary_conn != NULL)
|
|
||||||
{
|
|
||||||
if (current_primary_id != config_file_options.node_id)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* it's impossible to add a second primary to a streaming
|
|
||||||
* replication cluster
|
|
||||||
*/
|
|
||||||
log_error(_("there is already an active registered primary (node ID: %i) in this cluster"), current_primary_id);
|
|
||||||
PQfinish(primary_conn);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we've probably connected to ourselves */
|
|
||||||
PQfinish(primary_conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
begin_transaction(conn);
|
begin_transaction(conn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -128,12 +106,32 @@ do_primary_register(void)
|
|||||||
current_primary_id = get_primary_node_id(conn);
|
current_primary_id = get_primary_node_id(conn);
|
||||||
if (current_primary_id != NODE_NOT_FOUND && current_primary_id != config_file_options.node_id)
|
if (current_primary_id != NODE_NOT_FOUND && current_primary_id != config_file_options.node_id)
|
||||||
{
|
{
|
||||||
log_error(_("another node with id %i is already registered as primary"), current_primary_id);
|
log_debug("XXX %i", current_primary_id);
|
||||||
log_detail(_("a streaming replication cluster can have only one primary node"));
|
primary_conn = establish_primary_db_connection(conn, false);
|
||||||
|
|
||||||
rollback_transaction(conn);
|
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||||
PQfinish(conn);
|
{
|
||||||
exit(ERR_BAD_CONFIG);
|
if (get_recovery_type(primary_conn) == RECTYPE_PRIMARY)
|
||||||
|
{
|
||||||
|
log_error(_("there is already an active registered primary (node ID: %i) in this cluster"),
|
||||||
|
current_primary_id);
|
||||||
|
log_detail(_("a streaming replication cluster can have only one primary node"));
|
||||||
|
|
||||||
|
log_hint(_("ensure this node is shut down before registering a new primary"));
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
rollback_transaction(conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_warning(_("node %is is registered as primary but running as a standby"),
|
||||||
|
current_primary_id);
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("setting node %i's node record to inactive"),
|
||||||
|
current_primary_id);
|
||||||
|
update_node_record_set_active(conn, current_primary_id, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -213,7 +213,7 @@ do_standby_clone(void)
|
|||||||
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
||||||
|
|
||||||
get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
|
get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
|
||||||
if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, MAXLEN) != 0)
|
if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, sizeof(config_file_options.node_name)) != 0)
|
||||||
{
|
{
|
||||||
log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
|
log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
|
||||||
}
|
}
|
||||||
@@ -770,56 +770,73 @@ do_standby_clone(void)
|
|||||||
void
|
void
|
||||||
check_barman_config(void)
|
check_barman_config(void)
|
||||||
{
|
{
|
||||||
char command[MAXLEN];
|
PQExpBufferData command;
|
||||||
bool command_ok = false;
|
bool command_ok = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that there is at least one valid backup
|
* Check that there is at least one valid backup
|
||||||
*/
|
*/
|
||||||
|
|
||||||
log_info(_("connecting to Barman server to verify backup for %s"), config_file_options.barman_server);
|
log_info(_("connecting to Barman server to verify backup for \"%s\""), config_file_options.barman_server);
|
||||||
|
|
||||||
maxlen_snprintf(command, "%s show-backup %s latest > /dev/null",
|
initPQExpBuffer(&command);
|
||||||
make_barman_ssh_command(barman_command_buf),
|
|
||||||
config_file_options.barman_server);
|
|
||||||
|
|
||||||
command_ok = local_command(command, NULL);
|
appendPQExpBuffer(&command, "%s show-backup %s latest > /dev/null",
|
||||||
|
make_barman_ssh_command(barman_command_buf),
|
||||||
|
config_file_options.barman_server);
|
||||||
|
|
||||||
|
command_ok = local_command(command.data, NULL);
|
||||||
|
|
||||||
if (command_ok == false)
|
if (command_ok == false)
|
||||||
{
|
{
|
||||||
log_error(_("no valid backup for server %s was found in the Barman catalogue"),
|
log_error(_("no valid backup for server \"%s\" was found in the Barman catalogue"),
|
||||||
config_file_options.barman_server);
|
config_file_options.barman_server);
|
||||||
|
log_detail(_("command executed was:\n %s"), command.data),
|
||||||
log_hint(_("refer to the Barman documentation for more information"));
|
log_hint(_("refer to the Barman documentation for more information"));
|
||||||
|
|
||||||
|
termPQExpBuffer(&command);
|
||||||
exit(ERR_BARMAN);
|
exit(ERR_BARMAN);
|
||||||
}
|
}
|
||||||
|
else if (runtime_options.dry_run == true)
|
||||||
|
|
||||||
if (!create_pg_dir(local_data_directory, runtime_options.force))
|
|
||||||
{
|
{
|
||||||
log_error(_("unable to use directory %s"),
|
log_info(_("valid backup for server \"%s\" found in the Barman catalogue"),
|
||||||
local_data_directory);
|
config_file_options.barman_server);
|
||||||
log_hint(_("use -F/--force option to force this directory to be overwritten"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&command);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create the local repmgr subdirectory
|
* Attempt to create data directory (unless --dry-run specified,
|
||||||
|
* in which case do nothing; warnings will be emitted elsewhere about
|
||||||
|
* any issues with the data directory)
|
||||||
*/
|
*/
|
||||||
|
if (runtime_options.dry_run == false)
|
||||||
maxlen_snprintf(local_repmgr_tmp_directory,
|
|
||||||
"%s/repmgr", local_data_directory);
|
|
||||||
|
|
||||||
maxlen_snprintf(datadir_list_filename,
|
|
||||||
"%s/data.txt", local_repmgr_tmp_directory);
|
|
||||||
|
|
||||||
if (!create_pg_dir(local_repmgr_tmp_directory, runtime_options.force))
|
|
||||||
{
|
{
|
||||||
log_error(_("unable to create directory \"%s\""),
|
if (!create_pg_dir(local_data_directory, runtime_options.force))
|
||||||
local_repmgr_tmp_directory);
|
{
|
||||||
|
log_error(_("unable to use directory %s"),
|
||||||
|
local_data_directory);
|
||||||
|
log_hint(_("use -F/--force option to force this directory to be overwritten"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
/*
|
||||||
|
* Create the local repmgr subdirectory
|
||||||
|
*/
|
||||||
|
|
||||||
|
maxlen_snprintf(local_repmgr_tmp_directory,
|
||||||
|
"%s/repmgr", local_data_directory);
|
||||||
|
|
||||||
|
maxlen_snprintf(datadir_list_filename,
|
||||||
|
"%s/data.txt", local_repmgr_tmp_directory);
|
||||||
|
|
||||||
|
if (!create_pg_dir(local_repmgr_tmp_directory, runtime_options.force))
|
||||||
|
{
|
||||||
|
log_error(_("unable to create directory \"%s\""),
|
||||||
|
local_repmgr_tmp_directory);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -827,20 +844,37 @@ check_barman_config(void)
|
|||||||
*/
|
*/
|
||||||
log_info(_("connecting to Barman server to fetch server parameters"));
|
log_info(_("connecting to Barman server to fetch server parameters"));
|
||||||
|
|
||||||
maxlen_snprintf(command, "%s show-server %s > %s/show-server.txt",
|
initPQExpBuffer(&command);
|
||||||
make_barman_ssh_command(barman_command_buf),
|
|
||||||
config_file_options.barman_server,
|
|
||||||
local_repmgr_tmp_directory);
|
|
||||||
|
|
||||||
command_ok = local_command(command, NULL);
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&command, "%s show-server %s > /dev/null",
|
||||||
|
make_barman_ssh_command(barman_command_buf),
|
||||||
|
config_file_options.barman_server);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&command, "%s show-server %s > %s/show-server.txt",
|
||||||
|
make_barman_ssh_command(barman_command_buf),
|
||||||
|
config_file_options.barman_server,
|
||||||
|
local_repmgr_tmp_directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
command_ok = local_command(command.data, NULL);
|
||||||
|
|
||||||
if (command_ok == false)
|
if (command_ok == false)
|
||||||
{
|
{
|
||||||
log_error(_("unable to fetch server parameters from Barman server"));
|
log_error(_("unable to fetch server parameters from Barman server"));
|
||||||
|
log_detail(_("command executed was:\n %s"), command.data),
|
||||||
|
termPQExpBuffer(&command);
|
||||||
exit(ERR_BARMAN);
|
exit(ERR_BARMAN);
|
||||||
}
|
}
|
||||||
|
else if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("server parameters were successfully fetched from Barman server"));
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&command);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -872,7 +906,7 @@ _do_create_recovery_conf(void)
|
|||||||
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
char recovery_file_path[MAXPGPATH] = "";
|
char recovery_file_path[MAXPGPATH + sizeof(RECOVERY_COMMAND_FILE)] = "";
|
||||||
struct stat st;
|
struct stat st;
|
||||||
bool node_is_running = false;
|
bool node_is_running = false;
|
||||||
bool slot_creation_required = false;
|
bool slot_creation_required = false;
|
||||||
@@ -1117,7 +1151,10 @@ _do_create_recovery_conf(void)
|
|||||||
|
|
||||||
/* check if recovery.conf exists */
|
/* check if recovery.conf exists */
|
||||||
|
|
||||||
snprintf(recovery_file_path, MAXPGPATH, "%s/%s", local_data_directory, RECOVERY_COMMAND_FILE);
|
snprintf(recovery_file_path, sizeof(recovery_file_path),
|
||||||
|
"%s/%s",
|
||||||
|
local_data_directory,
|
||||||
|
RECOVERY_COMMAND_FILE);
|
||||||
|
|
||||||
if (stat(recovery_file_path, &st) == -1)
|
if (stat(recovery_file_path, &st) == -1)
|
||||||
{
|
{
|
||||||
@@ -1305,8 +1342,7 @@ do_standby_register(void)
|
|||||||
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s",
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
PQerrorMessage(conn));
|
|
||||||
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
|
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1436,6 +1472,17 @@ do_standby_register(void)
|
|||||||
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
|
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
|
||||||
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
if (runtime_options.upstream_node_id == config_file_options.node_id)
|
||||||
|
{
|
||||||
|
log_error(_("provided node ID for --upstream-node-id (%i) is the same as the configured local node ID (%i)"),
|
||||||
|
runtime_options.upstream_node_id,
|
||||||
|
config_file_options.node_id);
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
if (PQstatus(conn) == CONNECTION_OK)
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
upstream_record_status = get_node_record(primary_conn,
|
upstream_record_status = get_node_record(primary_conn,
|
||||||
runtime_options.upstream_node_id,
|
runtime_options.upstream_node_id,
|
||||||
&upstream_node_record);
|
&upstream_node_record);
|
||||||
@@ -1887,7 +1934,7 @@ do_standby_unregister(void)
|
|||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to primary server"));
|
log_error(_("unable to connect to primary server"));
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
log_detail("\n%s", PQerrorMessage(conn));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2269,6 +2316,7 @@ void
|
|||||||
do_standby_follow(void)
|
do_standby_follow(void)
|
||||||
{
|
{
|
||||||
PGconn *local_conn = NULL;
|
PGconn *local_conn = NULL;
|
||||||
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
PGconn *primary_conn = NULL;
|
PGconn *primary_conn = NULL;
|
||||||
int primary_node_id = UNKNOWN_NODE_ID;
|
int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
@@ -2307,6 +2355,19 @@ do_standby_follow(void)
|
|||||||
if (PQserverVersion(local_conn) < 90400)
|
if (PQserverVersion(local_conn) < 90400)
|
||||||
check_93_config();
|
check_93_config();
|
||||||
|
|
||||||
|
/* attempt to retrieve local node record */
|
||||||
|
record_status = get_node_record(local_conn,
|
||||||
|
config_file_options.node_id,
|
||||||
|
&local_node_record);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve record for local node %i"),
|
||||||
|
config_file_options.node_id);
|
||||||
|
PQfinish(local_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* --upstream-node-id provided - attempt to follow that node
|
* --upstream-node-id provided - attempt to follow that node
|
||||||
*/
|
*/
|
||||||
@@ -2551,6 +2612,9 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
conn_to_param_list(local_conn, &local_repl_conninfo);
|
conn_to_param_list(local_conn, &local_repl_conninfo);
|
||||||
|
|
||||||
|
/* Set the replication user from the node record */
|
||||||
|
param_set(&local_repl_conninfo, "user", local_node_record.repluser);
|
||||||
|
|
||||||
param_set(&local_repl_conninfo, "replication", "1");
|
param_set(&local_repl_conninfo, "replication", "1");
|
||||||
|
|
||||||
local_repl_conn = establish_db_connection_by_params(&local_repl_conninfo, false);
|
local_repl_conn = establish_db_connection_by_params(&local_repl_conninfo, false);
|
||||||
@@ -2837,8 +2901,8 @@ do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_n
|
|||||||
free_conninfo_params(&local_node_conninfo);
|
free_conninfo_params(&local_node_conninfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* store the original upstream node id so we can delete the
|
* Store the original upstream node id so we can delete the
|
||||||
* replication slot, if exists
|
* replication slot, if it exists.
|
||||||
*/
|
*/
|
||||||
if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
|
if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
|
||||||
{
|
{
|
||||||
@@ -2850,9 +2914,17 @@ do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_n
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false && original_upstream_node_id != UNKNOWN_NODE_ID)
|
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false)
|
||||||
{
|
{
|
||||||
remove_old_replication_slot = true;
|
/*
|
||||||
|
* Only attempt to delete the old replication slot if the old upstream
|
||||||
|
* node is known and is different to the follow target node.
|
||||||
|
*/
|
||||||
|
if (original_upstream_node_id != UNKNOWN_NODE_ID
|
||||||
|
&& original_upstream_node_id != follow_target_node_record->node_id)
|
||||||
|
{
|
||||||
|
remove_old_replication_slot = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2999,8 +3071,6 @@ do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_n
|
|||||||
* Note that if this function is called by do_standby_switchover(), the
|
* Note that if this function is called by do_standby_switchover(), the
|
||||||
* "repmgr node rejoin" command executed on the demotion candidate may already
|
* "repmgr node rejoin" command executed on the demotion candidate may already
|
||||||
* have removed the slot, so there may be nothing to do.
|
* have removed the slot, so there may be nothing to do.
|
||||||
*
|
|
||||||
* XXX check if former upstream is current primary?
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (remove_old_replication_slot == true)
|
if (remove_old_replication_slot == true)
|
||||||
@@ -3534,9 +3604,26 @@ do_standby_switchover(void)
|
|||||||
{
|
{
|
||||||
if (sibling_nodes.node_count > 0)
|
if (sibling_nodes.node_count > 0)
|
||||||
{
|
{
|
||||||
|
PQExpBufferData nodes;
|
||||||
|
NodeInfoListCell *cell;
|
||||||
|
|
||||||
|
initPQExpBuffer(&nodes);
|
||||||
|
|
||||||
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&nodes,
|
||||||
|
" %s (node ID: %i)",
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
if (cell->next)
|
||||||
|
appendPQExpBufferStr(&nodes, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
log_warning(_("%i sibling nodes found, but option \"--siblings-follow\" not specified"),
|
log_warning(_("%i sibling nodes found, but option \"--siblings-follow\" not specified"),
|
||||||
sibling_nodes.node_count);
|
sibling_nodes.node_count);
|
||||||
log_detail(_("these nodes will remain attached to the current primary"));
|
log_detail(_("these nodes will remain attached to the current primary:\n%s"), nodes.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&nodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -3884,7 +3971,7 @@ do_standby_switchover(void)
|
|||||||
log_detail(_("lag is %i seconds (warning threshold: %i)"),
|
log_detail(_("lag is %i seconds (warning threshold: %i)"),
|
||||||
lag_seconds, config_file_options.replication_lag_warning);
|
lag_seconds, config_file_options.replication_lag_warning);
|
||||||
}
|
}
|
||||||
else if (lag_seconds < 0)
|
else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
|
||||||
{
|
{
|
||||||
if (runtime_options.force == false)
|
if (runtime_options.force == false)
|
||||||
{
|
{
|
||||||
@@ -3986,13 +4073,14 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
|
||||||
|
|
||||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
||||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
||||||
repmgrd_info[i]->pid = UNKNOWN_PID;
|
repmgrd_info[i]->pid = UNKNOWN_PID;
|
||||||
repmgrd_info[i]->paused = false;
|
repmgrd_info[i]->paused = false;
|
||||||
repmgrd_info[i]->running = false;
|
repmgrd_info[i]->running = false;
|
||||||
|
repmgrd_info[i]->pg_running = true;
|
||||||
|
|
||||||
|
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -4002,12 +4090,25 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
repmgrd_info[i]->pg_running = false;
|
repmgrd_info[i]->pg_running = false;
|
||||||
|
|
||||||
item_list_append_format(&repmgrd_connection_errors,
|
/*
|
||||||
_("unable to connect to node \"%s\" (ID %i)"),
|
* Only worry about unreachable nodes if they're marked as active
|
||||||
cell->node_info->node_name,
|
* in the repmgr metadata.
|
||||||
cell->node_info->node_id);
|
*/
|
||||||
|
if (cell->node_info->active == true)
|
||||||
|
{
|
||||||
|
unreachable_node_count++;
|
||||||
|
|
||||||
unreachable_node_count++;
|
item_list_append_format(&repmgrd_connection_errors,
|
||||||
|
_("unable to connect to node \"%s\" (ID %i):\n%s"),
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id,
|
||||||
|
PQerrorMessage(cell->node_info->conn));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
|
|
||||||
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4069,11 +4170,37 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* pause repmgrd on all reachable nodes */
|
||||||
if (repmgrd_running_count > 0)
|
if (repmgrd_running_count > 0)
|
||||||
{
|
{
|
||||||
i = 0;
|
i = 0;
|
||||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip if node was unreachable
|
||||||
|
*/
|
||||||
|
if (repmgrd_info[i]->pg_running == false)
|
||||||
|
{
|
||||||
|
log_warning(_("node %s (ID %i) unreachable, unable to pause repmgrd"),
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip if repmgrd not running on node
|
||||||
|
*/
|
||||||
|
if (repmgrd_info[i]->running == false)
|
||||||
|
{
|
||||||
|
log_warning(_("repmgrd not running on node %s (ID %i)"),
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Skip if node is already paused. Note we won't unpause these, to
|
* Skip if node is already paused. Note we won't unpause these, to
|
||||||
* leave the repmgrd instances in the cluster in the same state they
|
* leave the repmgrd instances in the cluster in the same state they
|
||||||
@@ -4113,8 +4240,11 @@ do_standby_switchover(void)
|
|||||||
/* close all connections - we'll reestablish later */
|
/* close all connections - we'll reestablish later */
|
||||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
PQfinish(cell->node_info->conn);
|
if (cell->node_info->conn != NULL)
|
||||||
cell->node_info->conn = NULL;
|
{
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4188,6 +4318,7 @@ do_standby_switchover(void)
|
|||||||
*/
|
*/
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
{
|
{
|
||||||
|
/* we use a buffer here as it will be modified by string_remove_trailing_newlines() */
|
||||||
char shutdown_command[MAXLEN] = "";
|
char shutdown_command[MAXLEN] = "";
|
||||||
|
|
||||||
strncpy(shutdown_command, command_output.data, MAXLEN);
|
strncpy(shutdown_command, command_output.data, MAXLEN);
|
||||||
@@ -4304,6 +4435,9 @@ do_standby_switchover(void)
|
|||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("connection to local node lost, reconnecting..."));
|
log_warning(_("connection to local node lost, reconnecting..."));
|
||||||
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
PQfinish(local_conn);
|
||||||
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
@@ -4719,9 +4853,10 @@ do_standby_switchover(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
item_list_append_format(&repmgrd_unpause_errors,
|
item_list_append_format(&repmgrd_unpause_errors,
|
||||||
_("unable to connect to node \"%s\" (ID %i)"),
|
_("unable to connect to node \"%s\" (ID %i):\n%s"),
|
||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id,
|
||||||
|
PQerrorMessage(cell->node_info->conn));
|
||||||
error_node_count++;
|
error_node_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4733,6 +4868,8 @@ do_standby_switchover(void)
|
|||||||
PQExpBufferData detail;
|
PQExpBufferData detail;
|
||||||
ItemListCell *cell;
|
ItemListCell *cell;
|
||||||
|
|
||||||
|
initPQExpBuffer(&detail);
|
||||||
|
|
||||||
for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
|
for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&detail,
|
appendPQExpBuffer(&detail,
|
||||||
@@ -4913,19 +5050,41 @@ check_source_server()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the default pg_basebackup mode, we'll cowardly refuse to overwrite
|
* Check the local directory to see if it appears to be a PostgreSQL
|
||||||
* an existing data directory
|
* data directory.
|
||||||
|
*
|
||||||
|
* Note: a previous call to check_dir() will have checked whether it contains
|
||||||
|
* a running PostgreSQL instance.
|
||||||
*/
|
*/
|
||||||
if (mode == pg_basebackup)
|
if (is_pg_dir(local_data_directory))
|
||||||
{
|
{
|
||||||
if (is_pg_dir(local_data_directory) && runtime_options.force != true)
|
const char *msg = _("target data directory appears to be a PostgreSQL data directory");
|
||||||
|
const char *hint = _("use -F/--force to overwrite the existing data directory");
|
||||||
|
|
||||||
|
if (runtime_options.force == false && runtime_options.dry_run == false)
|
||||||
{
|
{
|
||||||
log_error(_("target data directory appears to be a PostgreSQL data directory"));
|
log_error("%s", msg);
|
||||||
log_detail(_("target data directory is \"%s\""), local_data_directory);
|
log_detail(_("target data directory is \"%s\""), local_data_directory);
|
||||||
log_hint(_("use -F/--force to overwrite the existing data directory"));
|
log_hint("%s", hint);
|
||||||
PQfinish(source_conn);
|
PQfinish(source_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
if (runtime_options.force == true)
|
||||||
|
{
|
||||||
|
log_warning("%s and will be overwritten", msg);
|
||||||
|
log_detail(_("target data directory is \"%s\""), local_data_directory);
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning("%s", msg);
|
||||||
|
log_detail(_("target data directory is \"%s\""), local_data_directory);
|
||||||
|
log_hint("%s", hint);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -6015,10 +6174,11 @@ run_file_backup(t_node_info *node_record)
|
|||||||
* Remove prefix
|
* Remove prefix
|
||||||
*/
|
*/
|
||||||
p = string_skip_prefix(prefix, output);
|
p = string_skip_prefix(prefix, output);
|
||||||
|
|
||||||
if (p == NULL)
|
if (p == NULL)
|
||||||
{
|
{
|
||||||
log_error("unexpected output from \"barman list-files\": %s",
|
log_error("unexpected output from \"barman list-files\"");
|
||||||
output);
|
log_detail("%s", output);
|
||||||
exit(ERR_BARMAN);
|
exit(ERR_BARMAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6036,6 +6196,14 @@ run_file_backup(t_node_info *node_record)
|
|||||||
strncat(prefix, backup_id, MAXLEN - 1);
|
strncat(prefix, backup_id, MAXLEN - 1);
|
||||||
strncat(prefix, "/", MAXLEN - 1);
|
strncat(prefix, "/", MAXLEN - 1);
|
||||||
p = string_skip_prefix(backup_id, p);
|
p = string_skip_prefix(backup_id, p);
|
||||||
|
|
||||||
|
if (p == NULL)
|
||||||
|
{
|
||||||
|
log_error("unexpected output from \"barman list-files\"");
|
||||||
|
log_detail("%s", output);
|
||||||
|
exit(ERR_BARMAN);
|
||||||
|
}
|
||||||
|
|
||||||
p = string_skip_prefix("/", p);
|
p = string_skip_prefix("/", p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -6047,8 +6215,8 @@ run_file_backup(t_node_info *node_record)
|
|||||||
basebackups_directory,
|
basebackups_directory,
|
||||||
backup_id,
|
backup_id,
|
||||||
local_repmgr_tmp_directory);
|
local_repmgr_tmp_directory);
|
||||||
(void) local_command(
|
|
||||||
command,
|
(void) local_command(command,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -6372,6 +6540,8 @@ run_file_backup(t_node_info *node_record)
|
|||||||
|
|
||||||
if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
|
if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
|
||||||
{
|
{
|
||||||
|
fclose(tablespace_map_file);
|
||||||
|
|
||||||
log_error(_("unable to write to tablespace_map file \"%s\""), tablespace_map_filename.data);
|
log_error(_("unable to write to tablespace_map file \"%s\""), tablespace_map_filename.data);
|
||||||
|
|
||||||
r = ERR_BAD_BASEBACKUP;
|
r = ERR_BAD_BASEBACKUP;
|
||||||
|
|||||||
@@ -56,8 +56,7 @@ do_witness_register(void)
|
|||||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s",
|
log_detail("\n%s", PQerrorMessage(witness_conn));
|
||||||
PQerrorMessage(witness_conn));
|
|
||||||
log_hint(_("the witness node must be running before it can be registered"));
|
log_hint(_("the witness node must be running before it can be registered"));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -411,7 +410,7 @@ do_witness_unregister(void)
|
|||||||
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
log_error(_("unable to connect to node \"%s\" (ID: %i)"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
config_file_options.node_id);
|
config_file_options.node_id);
|
||||||
log_detail("%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -437,7 +436,7 @@ do_witness_unregister(void)
|
|||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect to primary"));
|
log_error(_("unable to connect to primary"));
|
||||||
log_detail("%s", PQerrorMessage(primary_conn));
|
log_detail("\n%s", PQerrorMessage(primary_conn));
|
||||||
|
|
||||||
if (local_node_available == true)
|
if (local_node_available == true)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ typedef struct
|
|||||||
|
|
||||||
/* general node options */
|
/* general node options */
|
||||||
int node_id;
|
int node_id;
|
||||||
char node_name[MAXLEN];
|
char node_name[NAMEDATALEN];
|
||||||
char data_dir[MAXPGPATH];
|
char data_dir[MAXPGPATH];
|
||||||
int remote_node_id;
|
int remote_node_id;
|
||||||
|
|
||||||
|
|||||||
@@ -356,9 +356,15 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/* --node-name */
|
/* --node-name */
|
||||||
case OPT_NODE_NAME:
|
case OPT_NODE_NAME:
|
||||||
strncpy(runtime_options.node_name, optarg, MAXLEN);
|
{
|
||||||
|
if (strlen(optarg) < sizeof(runtime_options.node_name))
|
||||||
|
strncpy(runtime_options.node_name, optarg, sizeof(runtime_options.node_name));
|
||||||
|
else
|
||||||
|
item_list_append_format(&cli_errors,
|
||||||
|
_("value for \"--node-name\" must contain fewer than %lu characters"),
|
||||||
|
sizeof(runtime_options.node_name));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
/* --remote-node-id */
|
/* --remote-node-id */
|
||||||
case OPT_REMOTE_NODE_ID:
|
case OPT_REMOTE_NODE_ID:
|
||||||
runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, MIN_NODE_ID);
|
runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, MIN_NODE_ID);
|
||||||
@@ -1674,6 +1680,8 @@ check_cli_parameters(const int action)
|
|||||||
item_list_append_format(&cli_warnings,
|
item_list_append_format(&cli_warnings,
|
||||||
_("--replication-user ignored when executing %s"),
|
_("--replication-user ignored when executing %s"),
|
||||||
action_name(action));
|
action_name(action));
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
item_list_append_format(&cli_warnings,
|
item_list_append_format(&cli_warnings,
|
||||||
_("--replication-user not required when executing %s"),
|
_("--replication-user not required when executing %s"),
|
||||||
@@ -2211,7 +2219,7 @@ create_repmgr_extension(PGconn *conn)
|
|||||||
log_detail(_("version %s is installed but newer version %s is available"),
|
log_detail(_("version %s is installed but newer version %s is available"),
|
||||||
extversions.installed_version,
|
extversions.installed_version,
|
||||||
extversions.default_version);
|
extversions.default_version);
|
||||||
log_hint(_("execute \"ALTER EXTENSION repmgr UPGRADE\""));
|
log_hint(_("update the installed extension version by executing \"ALTER EXTENSION repmgr UPDATE\""));
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
case REPMGR_INSTALLED:
|
case REPMGR_INSTALLED:
|
||||||
@@ -2457,6 +2465,7 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
|
|||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("no database connection available"));
|
log_error(_("no database connection available"));
|
||||||
|
log_detail("\n%s", PQerrorMessage(*conn));
|
||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3000,7 +3009,7 @@ init_node_record(t_node_info *node_record)
|
|||||||
strncpy(node_record->location, "default", MAXLEN);
|
strncpy(node_record->location, "default", MAXLEN);
|
||||||
|
|
||||||
|
|
||||||
strncpy(node_record->node_name, config_file_options.node_name, MAXLEN);
|
strncpy(node_record->node_name, config_file_options.node_name, sizeof(node_record->node_name));
|
||||||
strncpy(node_record->conninfo, config_file_options.conninfo, MAXLEN);
|
strncpy(node_record->conninfo, config_file_options.conninfo, MAXLEN);
|
||||||
strncpy(node_record->config_file, config_file_path, MAXPGPATH);
|
strncpy(node_record->config_file, config_file_path, MAXPGPATH);
|
||||||
|
|
||||||
@@ -3054,9 +3063,6 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
|
|||||||
/* "full_page_writes" must be on in any case */
|
/* "full_page_writes" must be on in any case */
|
||||||
if (guc_set(conn, "full_page_writes", "=", "off"))
|
if (guc_set(conn, "full_page_writes", "=", "off"))
|
||||||
{
|
{
|
||||||
if (can_use == false)
|
|
||||||
appendPQExpBuffer(reason, "; ");
|
|
||||||
|
|
||||||
appendPQExpBuffer(reason,
|
appendPQExpBuffer(reason,
|
||||||
_("\"full_page_writes\" must be set to \"on\""));
|
_("\"full_page_writes\" must be set to \"on\""));
|
||||||
|
|
||||||
@@ -3143,6 +3149,8 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
|||||||
/*
|
/*
|
||||||
* Here we'll perform some timeline sanity checks to ensure the follow target
|
* Here we'll perform some timeline sanity checks to ensure the follow target
|
||||||
* can actually be followed.
|
* can actually be followed.
|
||||||
|
*
|
||||||
|
* See also comment for check_node_can_follow() in repmgrd-physical.c .
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
|
check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
|
||||||
@@ -3233,6 +3241,7 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* timelines are the same - check relative positions */
|
||||||
if (follow_target_identification.timeline == local_tli)
|
if (follow_target_identification.timeline == local_tli)
|
||||||
{
|
{
|
||||||
XLogRecPtr follow_target_xlogpos = get_node_current_lsn(follow_target_conn);
|
XLogRecPtr follow_target_xlogpos = get_node_current_lsn(follow_target_conn);
|
||||||
@@ -3244,7 +3253,6 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* timeline is the same - check relative positions */
|
|
||||||
if (local_xlogpos <= follow_target_xlogpos)
|
if (local_xlogpos <= follow_target_xlogpos)
|
||||||
{
|
{
|
||||||
log_info(_("timelines are same, this server is not ahead"));
|
log_info(_("timelines are same, this server is not ahead"));
|
||||||
|
|||||||
@@ -7,7 +7,8 @@
|
|||||||
# parameter will be treated as empty or false.
|
# parameter will be treated as empty or false.
|
||||||
#
|
#
|
||||||
# IMPORTANT: string values can be provided as-is, or enclosed in single quotes
|
# IMPORTANT: string values can be provided as-is, or enclosed in single quotes
|
||||||
# (but not double-quotes, which will be interpreted as part of the string), e.g.:
|
# (but not double-quotes, which will be interpreted as part of the string),
|
||||||
|
# e.g.:
|
||||||
#
|
#
|
||||||
# node_name=foo
|
# node_name=foo
|
||||||
# node_name = 'foo'
|
# node_name = 'foo'
|
||||||
@@ -24,9 +25,11 @@
|
|||||||
# using the server's hostname or another identifier
|
# using the server's hostname or another identifier
|
||||||
# unambiguously associated with the server to avoid
|
# unambiguously associated with the server to avoid
|
||||||
# confusion. Avoid choosing names which reflect the
|
# confusion. Avoid choosing names which reflect the
|
||||||
# node's current role, e.g. "primary" or "standby1",
|
# node's current role, e.g. 'primary' or 'standby1',
|
||||||
# as roles can change and it will be confusing if
|
# as roles can change and it will be confusing if
|
||||||
# the current primary is called "standby1".
|
# the current primary is called 'standby1'.
|
||||||
|
# The string's maximum length is 63 characters and it should
|
||||||
|
# contain only printable ASCII characters.
|
||||||
|
|
||||||
#conninfo='' # Database connection information as a conninfo string.
|
#conninfo='' # Database connection information as a conninfo string.
|
||||||
# All servers in the cluster must be able to connect to
|
# All servers in the cluster must be able to connect to
|
||||||
@@ -69,6 +72,7 @@
|
|||||||
# to the user defined in "conninfo".
|
# to the user defined in "conninfo".
|
||||||
|
|
||||||
#replication_type=physical # Must be one of 'physical' or 'bdr'.
|
#replication_type=physical # Must be one of 'physical' or 'bdr'.
|
||||||
|
# NOTE: "bdr" can only be used with BDR 2.x
|
||||||
|
|
||||||
#location=default # arbitrary string defining the location of the node; this
|
#location=default # arbitrary string defining the location of the node; this
|
||||||
# is used during failover to check visibilty of the
|
# is used during failover to check visibilty of the
|
||||||
@@ -334,8 +338,6 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover" is true, the maximum length of time
|
#sibling_nodes_disconnect_timeout=30 # If "standby_disconnect_on_failover" is true, the maximum length of time
|
||||||
# (in seconds) to wait for other standbys to confirm they have disconnected their
|
# (in seconds) to wait for other standbys to confirm they have disconnected their
|
||||||
# WAL receivers
|
# WAL receivers
|
||||||
#primary_visibility_consensus=false # If "true", only continue with failover if no standbys have seen
|
|
||||||
# the primary node recently. *Must* be the same on all nodes.
|
|
||||||
#failover_validation_command= # Script to execute for an external mechanism to validate the failover
|
#failover_validation_command= # Script to execute for an external mechanism to validate the failover
|
||||||
# decision made by repmgrd. One or both of the following parameter placeholders
|
# decision made by repmgrd. One or both of the following parameter placeholders
|
||||||
# should be provided, which will be replaced by repmgrd with the appropriate
|
# should be provided, which will be replaced by repmgrd with the appropriate
|
||||||
|
|||||||
2
repmgr.h
2
repmgr.h
@@ -55,6 +55,7 @@
|
|||||||
#define UNKNOWN_TIMELINE_ID -1
|
#define UNKNOWN_TIMELINE_ID -1
|
||||||
#define UNKNOWN_SYSTEM_IDENTIFIER 0
|
#define UNKNOWN_SYSTEM_IDENTIFIER 0
|
||||||
#define UNKNOWN_PID -1
|
#define UNKNOWN_PID -1
|
||||||
|
#define UNKNOWN_REPLICATION_LAG -1
|
||||||
|
|
||||||
#define NODE_NOT_FOUND -1
|
#define NODE_NOT_FOUND -1
|
||||||
#define NO_UPSTREAM_NODE -1
|
#define NO_UPSTREAM_NODE -1
|
||||||
@@ -63,6 +64,7 @@
|
|||||||
#define ELECTION_RERUN_NOTIFICATION -2
|
#define ELECTION_RERUN_NOTIFICATION -2
|
||||||
#define VOTING_TERM_NOT_SET -1
|
#define VOTING_TERM_NOT_SET -1
|
||||||
#define ARCHIVE_STATUS_DIR_ERROR -1
|
#define ARCHIVE_STATUS_DIR_ERROR -1
|
||||||
|
#define NO_DEGRADED_MONITORING_ELAPSED -1
|
||||||
|
|
||||||
#define BDR2_REPLICATION_SET_NAME "repmgr"
|
#define BDR2_REPLICATION_SET_NAME "repmgr"
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
#define REPMGR_VERSION_DATE ""
|
#define REPMGR_VERSION_DATE ""
|
||||||
#define REPMGR_VERSION "4.3rc1"
|
#define REPMGR_VERSION "4.3.1"
|
||||||
#define REPMGR_VERSION_NUM 40300
|
#define REPMGR_VERSION_NUM 40301
|
||||||
|
|||||||
157
repmgrd-bdr.c
157
repmgrd-bdr.c
@@ -68,7 +68,6 @@ monitor_bdr(void)
|
|||||||
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
|
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus record_status;
|
RecordStatus record_status;
|
||||||
NodeInfoListCell *cell;
|
NodeInfoListCell *cell;
|
||||||
PQExpBufferData event_details;
|
|
||||||
instr_time log_status_interval_start;
|
instr_time log_status_interval_start;
|
||||||
|
|
||||||
/* sanity check local database */
|
/* sanity check local database */
|
||||||
@@ -97,9 +96,21 @@ monitor_bdr(void)
|
|||||||
if (!is_bdr_db(local_conn, NULL))
|
if (!is_bdr_db(local_conn, NULL))
|
||||||
{
|
{
|
||||||
log_error(_("database is not BDR-enabled"));
|
log_error(_("database is not BDR-enabled"));
|
||||||
|
PQfinish(local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check this is a supported BDR version (basically BDR 2.x)
|
||||||
|
*/
|
||||||
|
if (get_bdr_version_num() > 2)
|
||||||
|
{
|
||||||
|
log_error(_("\"bdr\" mode is for BDR 2.x only"));
|
||||||
|
log_hint(_("for BDR 3 and later, use \"replication_type=physical\""));
|
||||||
|
log_error(_("database is not BDR-enabled"));
|
||||||
|
exit(ERR_DB_CONN);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_table_in_bdr_replication_set(local_conn, "nodes", "repmgr") == false)
|
if (is_table_in_bdr_replication_set(local_conn, "nodes", "repmgr") == false)
|
||||||
{
|
{
|
||||||
log_error(_("repmgr metadata table 'repmgr.%s' is not in the 'repmgr' replication set"),
|
log_error(_("repmgr metadata table 'repmgr.%s' is not in the 'repmgr' replication set"),
|
||||||
@@ -229,6 +240,7 @@ monitor_bdr(void)
|
|||||||
if (cell->node_info->node_status == NODE_STATUS_UP)
|
if (cell->node_info->node_status == NODE_STATUS_UP)
|
||||||
{
|
{
|
||||||
int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start);
|
int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start);
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
@@ -366,7 +378,6 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
|
|||||||
{
|
{
|
||||||
PGconn *next_node_conn = NULL;
|
PGconn *next_node_conn = NULL;
|
||||||
NodeInfoListCell *cell;
|
NodeInfoListCell *cell;
|
||||||
PQExpBufferData event_details;
|
|
||||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
||||||
t_node_info target_node = T_NODE_INFO_INITIALIZER;
|
t_node_info target_node = T_NODE_INFO_INITIALIZER;
|
||||||
t_node_info failed_node = T_NODE_INFO_INITIALIZER;
|
t_node_info failed_node = T_NODE_INFO_INITIALIZER;
|
||||||
@@ -460,45 +471,49 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
|
|||||||
|
|
||||||
log_debug("this node is the failover handler");
|
log_debug("this node is the failover handler");
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
{
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
event_info.conninfo_str = target_node.conninfo;
|
initPQExpBuffer(&event_details);
|
||||||
event_info.node_name = target_node.node_name;
|
|
||||||
|
|
||||||
/* update node record on the active node */
|
event_info.conninfo_str = target_node.conninfo;
|
||||||
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
|
event_info.node_name = target_node.node_name;
|
||||||
|
|
||||||
log_notice(_("setting node record for node %i to inactive"), monitored_node->node_id);
|
/* update node record on the active node */
|
||||||
|
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
log_notice(_("setting node record for node %i to inactive"), monitored_node->node_id);
|
||||||
_("node \"%s\" (ID: %i) detected as failed; next available node is \"%s\" (ID: %i)"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id,
|
|
||||||
target_node.node_name,
|
|
||||||
target_node.node_id);
|
|
||||||
|
|
||||||
/*
|
appendPQExpBuffer(&event_details,
|
||||||
* Create an event record
|
_("node \"%s\" (ID: %i) detected as failed; next available node is \"%s\" (ID: %i)"),
|
||||||
*
|
monitored_node->node_name,
|
||||||
* If we were able to connect to another node, we'll update the event log
|
monitored_node->node_id,
|
||||||
* there.
|
target_node.node_name,
|
||||||
*
|
target_node.node_id);
|
||||||
* In any case the event notification command will be triggered with the
|
|
||||||
* event "bdr_failover"
|
/*
|
||||||
*/
|
* Create an event record
|
||||||
|
*
|
||||||
|
* If we were able to connect to another node, we'll update the event log
|
||||||
|
* there.
|
||||||
|
*
|
||||||
|
* In any case the event notification command will be triggered with the
|
||||||
|
* event "bdr_failover"
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
create_event_notification_extended(next_node_conn,
|
create_event_notification_extended(next_node_conn,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
monitored_node->node_id,
|
monitored_node->node_id,
|
||||||
"bdr_failover",
|
"bdr_failover",
|
||||||
true,
|
true,
|
||||||
event_details.data,
|
event_details.data,
|
||||||
&event_info);
|
&event_info);
|
||||||
|
|
||||||
log_info("%s", event_details.data);
|
log_info("%s", event_details.data);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
|
}
|
||||||
|
|
||||||
unset_bdr_failover_handler(next_node_conn);
|
unset_bdr_failover_handler(next_node_conn);
|
||||||
|
|
||||||
@@ -513,7 +528,6 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
|
|||||||
{
|
{
|
||||||
PGconn *recovered_node_conn;
|
PGconn *recovered_node_conn;
|
||||||
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
||||||
int i;
|
int i;
|
||||||
bool slot_reactivated = false;
|
bool slot_reactivated = false;
|
||||||
@@ -543,6 +557,8 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
|
|||||||
*/
|
*/
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
local_conn = NULL;
|
local_conn = NULL;
|
||||||
log_warning(_("unable to reconnect to local node"));
|
log_warning(_("unable to reconnect to local node"));
|
||||||
|
|
||||||
@@ -613,49 +629,50 @@ do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
|
|||||||
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
||||||
monitored_node->monitoring_state = MS_NORMAL;
|
monitored_node->monitoring_state = MS_NORMAL;
|
||||||
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("node \"%s\" (ID: %i) has recovered after %i seconds"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id,
|
|
||||||
node_recovery_elapsed);
|
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
|
|
||||||
|
|
||||||
/* other node will generate the event */
|
|
||||||
if (monitored_node->node_id == local_node_info.node_id)
|
|
||||||
{
|
{
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("node \"%s\" (ID: %i) has recovered after %i seconds"),
|
||||||
|
monitored_node->node_name,
|
||||||
|
monitored_node->node_id,
|
||||||
|
node_recovery_elapsed);
|
||||||
|
|
||||||
|
log_notice("%s", event_details.data);
|
||||||
|
|
||||||
|
|
||||||
|
/* other node will generate the event */
|
||||||
|
if (monitored_node->node_id == local_node_info.node_id)
|
||||||
|
{
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
PQfinish(recovered_node_conn);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* generate the event on the currently active node only */
|
||||||
|
if (monitored_node->node_id != local_node_info.node_id)
|
||||||
|
{
|
||||||
|
event_info.conninfo_str = monitored_node->conninfo;
|
||||||
|
event_info.node_name = monitored_node->node_name;
|
||||||
|
|
||||||
|
create_event_notification_extended(local_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"bdr_recovery",
|
||||||
|
true,
|
||||||
|
event_details.data,
|
||||||
|
&event_info);
|
||||||
|
}
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* generate the event on the currently active node only */
|
|
||||||
if (monitored_node->node_id != local_node_info.node_id)
|
|
||||||
{
|
|
||||||
event_info.conninfo_str = monitored_node->conninfo;
|
|
||||||
event_info.node_name = monitored_node->node_name;
|
|
||||||
|
|
||||||
create_event_notification_extended(
|
|
||||||
local_conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"bdr_recovery",
|
|
||||||
true,
|
|
||||||
event_details.data,
|
|
||||||
&event_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
update_node_record_set_active(local_conn, monitored_node->node_id, true);
|
update_node_record_set_active(local_conn, monitored_node->node_id, true);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
PQfinish(recovered_node_conn);
|
PQfinish(recovered_node_conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|||||||
1003
repmgrd-physical.c
1003
repmgrd-physical.c
File diff suppressed because it is too large
Load Diff
@@ -409,8 +409,8 @@ main(int argc, char **argv)
|
|||||||
log_detail(_("\"repmgr\" version %s is installed but extension is version %s"),
|
log_detail(_("\"repmgr\" version %s is installed but extension is version %s"),
|
||||||
REPMGR_VERSION,
|
REPMGR_VERSION,
|
||||||
extversions.installed_version);
|
extversions.installed_version);
|
||||||
|
log_hint(_("update the repmgr binaries to match the installed extension version"));
|
||||||
|
|
||||||
log_hint(_("verify the repmgr installation on this server is updated properly before continuing"));
|
|
||||||
close_connection(&local_conn);
|
close_connection(&local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -421,8 +421,8 @@ main(int argc, char **argv)
|
|||||||
log_detail(_("\"repmgr\" version %s is installed but extension is version %s"),
|
log_detail(_("\"repmgr\" version %s is installed but extension is version %s"),
|
||||||
REPMGR_VERSION,
|
REPMGR_VERSION,
|
||||||
extversions.installed_version);
|
extversions.installed_version);
|
||||||
|
log_hint(_("update the installed extension version by executing \"ALTER EXTENSION repmgr UPDATE\""));
|
||||||
|
|
||||||
log_hint(_("verify the repmgr extension is updated properly before continuing"));
|
|
||||||
close_connection(&local_conn);
|
close_connection(&local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -433,7 +433,7 @@ main(int argc, char **argv)
|
|||||||
if (extension_status == REPMGR_UNKNOWN)
|
if (extension_status == REPMGR_UNKNOWN)
|
||||||
{
|
{
|
||||||
log_error(_("unable to determine status of \"repmgr\" extension"));
|
log_error(_("unable to determine status of \"repmgr\" extension"));
|
||||||
log_detail("%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
close_connection(&local_conn);
|
close_connection(&local_conn);
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
@@ -561,6 +561,8 @@ start_monitoring(void)
|
|||||||
local_node_info.node_name,
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
|
|
||||||
|
log_info(_("\"connection_check_type\" set to \"%s\""), print_connection_check_type(config_file_options.connection_check_type));
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
switch (local_node_info.type)
|
switch (local_node_info.type)
|
||||||
@@ -846,6 +848,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||||
|
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
PQfinish(test_conn);
|
PQfinish(test_conn);
|
||||||
|
|||||||
18
sysutils.c
18
sysutils.c
@@ -310,18 +310,26 @@ enable_wal_receiver(PGconn *conn, bool wait_startup)
|
|||||||
if (wal_retrieve_retry_interval > WALRECEIVER_DISABLE_TIMEOUT_VALUE)
|
if (wal_retrieve_retry_interval > WALRECEIVER_DISABLE_TIMEOUT_VALUE)
|
||||||
{
|
{
|
||||||
int new_wal_retrieve_retry_interval = wal_retrieve_retry_interval - WALRECEIVER_DISABLE_TIMEOUT_VALUE;
|
int new_wal_retrieve_retry_interval = wal_retrieve_retry_interval - WALRECEIVER_DISABLE_TIMEOUT_VALUE;
|
||||||
|
bool success;
|
||||||
|
|
||||||
log_notice(_("setting \"wal_retrieve_retry_interval\" to %i ms"),
|
log_notice(_("setting \"wal_retrieve_retry_interval\" to %i ms"),
|
||||||
new_wal_retrieve_retry_interval);
|
new_wal_retrieve_retry_interval);
|
||||||
|
|
||||||
// XXX handle error
|
success = alter_system_int(conn,
|
||||||
alter_system_int(conn,
|
"wal_retrieve_retry_interval",
|
||||||
"wal_retrieve_retry_interval",
|
new_wal_retrieve_retry_interval);
|
||||||
new_wal_retrieve_retry_interval);
|
|
||||||
|
if (success == false)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to change \"wal_retrieve_retry_interval\""));
|
||||||
|
return UNKNOWN_PID;
|
||||||
|
}
|
||||||
|
|
||||||
pg_reload_conf(conn);
|
pg_reload_conf(conn);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// XXX add threshold sanity check
|
/* TODO: add threshold sanity check */
|
||||||
log_info(_("\"wal_retrieve_retry_interval\" is %i, not changing"),
|
log_info(_("\"wal_retrieve_retry_interval\" is %i, not changing"),
|
||||||
wal_retrieve_retry_interval);
|
wal_retrieve_retry_interval);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user