mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
Write events of note to a log table
This makes keeping track of events such as failovers
much easier. Note that this is for convenience and is
not a foolproof auditing log.
Sample output:
repmgr_db=# SELECT * from repmgr_test.repl_events ;
node_id | event | successful | event_timestamp | details
---------+--------------------------+------------+-------------------------------+----------------------------------------------------------
1 | master_register | t | 2015-03-06 14:14:08.196636+09 |
2 | standby_clone | t | 2015-03-06 14:14:17.660768+09 | Backup method: pg_basebackup; --force: N
2 | standby_register | t | 2015-03-06 14:14:18.762222+09 |
4 | witness_create | t | 2015-03-06 14:14:22.072815+09 |
3 | standby_clone | t | 2015-03-06 14:14:23.524673+09 | Backup method: pg_basebackup; --force: N
3 | standby_register | t | 2015-03-06 14:14:24.620161+09 |
2 | repmgrd_start | t | 2015-03-06 14:14:29.639096+09 |
3 | repmgrd_start | t | 2015-03-06 14:14:29.641489+09 |
4 | repmgrd_start | t | 2015-03-06 14:14:29.648002+09 |
2 | standby_promote | t | 2015-03-06 14:15:01.956737+09 | Node 2 was successfully be promoted to master
2 | repmgrd_failover_promote | t | 2015-03-06 14:15:01.964771+09 | Node 2 promoted to master; old master 1 marked as failed
3 | repmgrd_failover_follow | t | 2015-03-06 14:15:07.228493+09 | Node 3 now following new upstream node 2
(12 rows)
This commit is contained in:
8
FAQ.md
8
FAQ.md
@@ -26,6 +26,14 @@ General
|
|||||||
(However this does mean that if a standby is no longer connected to the
|
(However this does mean that if a standby is no longer connected to the
|
||||||
master, the master will retain WAL files indefinitely).
|
master, the master will retain WAL files indefinitely).
|
||||||
|
|
||||||
|
- What's the difference between the `repl_status` view and `repl_events`
|
||||||
|
table?
|
||||||
|
|
||||||
|
The `repl_status` view shows the latest node status as reported by
|
||||||
|
`repmgrd` and is useful for monitoring current lag.
|
||||||
|
`repl_events` is a record of significant events which have
|
||||||
|
taken place, such as a node being registered or a failover event.
|
||||||
|
|
||||||
`repmgr`
|
`repmgr`
|
||||||
--------
|
--------
|
||||||
|
|
||||||
|
|||||||
52
dbutils.c
52
dbutils.c
@@ -1016,3 +1016,55 @@ delete_node_record(PGconn *conn, int node, char *action)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
create_event_record(PGconn *conn, int node_id, char *event, bool successful, char *details)
|
||||||
|
{
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
PGresult *res;
|
||||||
|
|
||||||
|
int n_node_id = htonl(node_id);
|
||||||
|
char *t_successful = successful ? "TRUE" : "FALSE";
|
||||||
|
|
||||||
|
const char *values[4] = { (char *)&n_node_id,
|
||||||
|
event,
|
||||||
|
t_successful,
|
||||||
|
details
|
||||||
|
};
|
||||||
|
|
||||||
|
int lengths[4] = { sizeof(n_node_id),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0
|
||||||
|
};
|
||||||
|
int binary[4] = {1, 0, 0, 0};
|
||||||
|
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
" INSERT INTO %s.repl_events ( "
|
||||||
|
" node_id, "
|
||||||
|
" event, "
|
||||||
|
" successful, "
|
||||||
|
" details "
|
||||||
|
" ) "
|
||||||
|
" VALUES ($1, $2, $3, $4) ",
|
||||||
|
get_repmgr_schema_quoted(conn));
|
||||||
|
|
||||||
|
res = PQexecParams(conn,
|
||||||
|
sqlquery,
|
||||||
|
4,
|
||||||
|
NULL,
|
||||||
|
values,
|
||||||
|
lengths,
|
||||||
|
binary,
|
||||||
|
0);
|
||||||
|
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("Cannot insert event details, %s\n"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
|||||||
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
|
||||||
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
|
||||||
bool delete_node_record(PGconn *conn, int node, char *action);
|
bool delete_node_record(PGconn *conn, int node, char *action);
|
||||||
|
bool create_event_record(PGconn *conn, int node_id, char *event, bool successful, char *details);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
365
repmgr.c
365
repmgr.c
@@ -592,7 +592,6 @@ do_cluster_show(void)
|
|||||||
printf("Role | Connection String \n");
|
printf("Role | Connection String \n");
|
||||||
for (i = 0; i < PQntuples(res); i++)
|
for (i = 0; i < PQntuples(res); i++)
|
||||||
{
|
{
|
||||||
// ZZZ witness
|
|
||||||
conn = establish_db_connection(PQgetvalue(res, i, 0), false);
|
conn = establish_db_connection(PQgetvalue(res, i, 0), false);
|
||||||
if (PQstatus(conn) != CONNECTION_OK)
|
if (PQstatus(conn) != CONNECTION_OK)
|
||||||
strcpy(node_role, " FAILED");
|
strcpy(node_role, " FAILED");
|
||||||
@@ -683,7 +682,7 @@ do_master_register(void)
|
|||||||
bool schema_exists = false;
|
bool schema_exists = false;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bool node_record_created;
|
bool record_created;
|
||||||
|
|
||||||
conn = establish_db_connection(options.conninfo, true);
|
conn = establish_db_connection(options.conninfo, true);
|
||||||
|
|
||||||
@@ -755,22 +754,39 @@ do_master_register(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Now register the master */
|
/* Now register the master */
|
||||||
node_record_created = create_node_record(conn,
|
record_created = create_node_record(conn,
|
||||||
"master register",
|
"master register",
|
||||||
options.node,
|
options.node,
|
||||||
"primary",
|
"primary",
|
||||||
NO_UPSTREAM_NODE,
|
NO_UPSTREAM_NODE,
|
||||||
options.cluster_name,
|
options.cluster_name,
|
||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr);
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Log the event */
|
||||||
|
record_created = create_event_record(conn,
|
||||||
|
options.node,
|
||||||
|
"master_register",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
|
||||||
if(node_record_created == false)
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
|
|
||||||
log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
|
log_notice(_("Master node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
|
||||||
options.cluster_name, options.node, options.conninfo);
|
options.cluster_name, options.node, options.conninfo);
|
||||||
return;
|
return;
|
||||||
@@ -790,7 +806,7 @@ do_standby_register(void)
|
|||||||
char standby_version[MAXVERSIONSTR];
|
char standby_version[MAXVERSIONSTR];
|
||||||
int standby_version_num = 0;
|
int standby_version_num = 0;
|
||||||
|
|
||||||
bool node_record_created;
|
bool record_created;
|
||||||
|
|
||||||
log_info(_("%s connecting to standby database\n"), progname);
|
log_info(_("%s connecting to standby database\n"), progname);
|
||||||
conn = establish_db_connection(options.conninfo, true);
|
conn = establish_db_connection(options.conninfo, true);
|
||||||
@@ -864,23 +880,43 @@ do_standby_register(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
node_record_created = create_node_record(master_conn,
|
record_created = create_node_record(master_conn,
|
||||||
"standby register",
|
"standby register",
|
||||||
options.node,
|
options.node,
|
||||||
"standby",
|
"standby",
|
||||||
options.upstream_node,
|
options.upstream_node,
|
||||||
options.cluster_name,
|
options.cluster_name,
|
||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
repmgr_slot_name_ptr);
|
repmgr_slot_name_ptr);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Log the event */
|
||||||
|
record_created = create_event_record(master_conn,
|
||||||
|
options.node,
|
||||||
|
"standby_register",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(master_conn);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(master_conn);
|
PQfinish(master_conn);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
|
||||||
if(node_record_created == false)
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
|
|
||||||
log_info(_("%s registering the standby complete\n"), progname);
|
log_info(_("%s registering the standby complete\n"), progname);
|
||||||
log_notice(_("Standby node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
|
log_notice(_("Standby node correctly registered for cluster %s with id %d (conninfo: %s)\n"),
|
||||||
options.cluster_name, options.node, options.conninfo);
|
options.cluster_name, options.node, options.conninfo);
|
||||||
@@ -927,6 +963,9 @@ do_standby_clone(void)
|
|||||||
char *first_wal_segment = NULL;
|
char *first_wal_segment = NULL;
|
||||||
char *last_wal_segment = NULL;
|
char *last_wal_segment = NULL;
|
||||||
|
|
||||||
|
bool record_created = FALSE;
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If dest_dir (-D/--pgdata) was provided, this will become the new data
|
* If dest_dir (-D/--pgdata) was provided, this will become the new data
|
||||||
* directory (otherwise repmgr will default to the same directory as on the
|
* directory (otherwise repmgr will default to the same directory as on the
|
||||||
@@ -1219,7 +1258,8 @@ do_standby_clone(void)
|
|||||||
{
|
{
|
||||||
for (cell = options.tablespace_mapping.head; cell; cell = cell->next)
|
for (cell = options.tablespace_mapping.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
if(strcmp( tblspc_dir_src.data, cell->old_dir) == 0) {
|
if(strcmp( tblspc_dir_src.data, cell->old_dir) == 0)
|
||||||
|
{
|
||||||
mapping_found = true;
|
mapping_found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1471,6 +1511,30 @@ stop_backup:
|
|||||||
log_notice("for example : /etc/init.d/postgresql start\n");
|
log_notice("for example : /etc/init.d/postgresql start\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Log the event */
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
/* Add details about relevant runtime options used */
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Backup method: %s"),
|
||||||
|
runtime_options.rsync_only ? "rsync" : "pg_basebackup");
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("; --force: %s"),
|
||||||
|
runtime_options.force ? "Y" : "N");
|
||||||
|
|
||||||
|
record_created = create_event_record(primary_conn,
|
||||||
|
options.node,
|
||||||
|
"standby_clone",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
//destroyPQExpBuffer(&event_details);
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(primary_conn);
|
PQfinish(primary_conn);
|
||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
@@ -1494,6 +1558,7 @@ do_standby_promote(void)
|
|||||||
promote_check_interval = 2;
|
promote_check_interval = 2;
|
||||||
bool promote_sucess = false;
|
bool promote_sucess = false;
|
||||||
bool success;
|
bool success;
|
||||||
|
bool record_created;
|
||||||
|
|
||||||
/* We need to connect to check configuration */
|
/* We need to connect to check configuration */
|
||||||
log_info(_("%s connecting to standby database\n"), progname);
|
log_info(_("%s connecting to standby database\n"), progname);
|
||||||
@@ -1575,6 +1640,17 @@ do_standby_promote(void)
|
|||||||
|
|
||||||
if (promote_sucess == false)
|
if (promote_sucess == false)
|
||||||
{
|
{
|
||||||
|
PQExpBufferData details;
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
"Node %i could not be promoted to master",
|
||||||
|
options.node);
|
||||||
|
|
||||||
|
record_created = create_event_record(old_master_conn,
|
||||||
|
options.node,
|
||||||
|
"standby_promote",
|
||||||
|
false,
|
||||||
|
details.data);
|
||||||
/* XXX exit with error? */
|
/* XXX exit with error? */
|
||||||
log_err(_(retval == 1 ?
|
log_err(_(retval == 1 ?
|
||||||
"%s: STANDBY PROMOTE failed, this is still a standby node.\n" :
|
"%s: STANDBY PROMOTE failed, this is still a standby node.\n" :
|
||||||
@@ -1582,10 +1658,30 @@ do_standby_promote(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
PQExpBufferData details;
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
"Node %i was successfully be promoted to master",
|
||||||
|
options.node);
|
||||||
|
|
||||||
log_notice(_("%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"),
|
log_notice(_("%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n"),
|
||||||
progname);
|
progname);
|
||||||
|
/* Log the event */
|
||||||
|
record_created = create_event_record(conn,
|
||||||
|
options.node,
|
||||||
|
"standby_promote",
|
||||||
|
true,
|
||||||
|
details.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PQfinish(old_master_conn);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1749,7 +1845,7 @@ do_witness_create(void)
|
|||||||
|
|
||||||
char master_hba_file[MAXLEN];
|
char master_hba_file[MAXLEN];
|
||||||
bool success;
|
bool success;
|
||||||
bool node_record_created;
|
bool record_created;
|
||||||
|
|
||||||
/* Connection parameters for master only */
|
/* Connection parameters for master only */
|
||||||
keywords[0] = "host";
|
keywords[0] = "host";
|
||||||
@@ -1761,6 +1857,7 @@ do_witness_create(void)
|
|||||||
masterconn = establish_db_connection_by_params(keywords, values, true);
|
masterconn = establish_db_connection_by_params(keywords, values, true);
|
||||||
if (!masterconn)
|
if (!masterconn)
|
||||||
{
|
{
|
||||||
|
/* No event logging possible as we can't connect to the master */
|
||||||
log_err(_("%s: could not connect to master\n"), progname);
|
log_err(_("%s: could not connect to master\n"), progname);
|
||||||
exit(ERR_DB_CON);
|
exit(ERR_DB_CON);
|
||||||
}
|
}
|
||||||
@@ -1772,10 +1869,21 @@ do_witness_create(void)
|
|||||||
retval = is_standby(masterconn);
|
retval = is_standby(masterconn);
|
||||||
if (retval)
|
if (retval)
|
||||||
{
|
{
|
||||||
log_err(_(retval == 1 ?
|
PQExpBufferData errmsg;
|
||||||
"The command should not run on a standby node\n" :
|
initPQExpBuffer(&errmsg);
|
||||||
"Connection to node lost!\n"));
|
appendPQExpBuffer(&errmsg,
|
||||||
|
"%s",
|
||||||
|
_(retval == 1 ?
|
||||||
|
"Provided upstream node is not a master" :
|
||||||
|
"Connection to upstream node lost"));
|
||||||
|
|
||||||
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg.data);
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1785,8 +1893,18 @@ do_witness_create(void)
|
|||||||
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("%s: Aborting, remote host %s is not reachable.\n"),
|
PQExpBufferData errmsg;
|
||||||
progname, runtime_options.host);
|
initPQExpBuffer(&errmsg);
|
||||||
|
appendPQExpBuffer(&errmsg,
|
||||||
|
_("%s: unable to connect to remote host '%s' via SSH"),
|
||||||
|
progname, runtime_options.host);
|
||||||
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg.data);
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_SSH);
|
exit(ERR_BAD_SSH);
|
||||||
}
|
}
|
||||||
@@ -1794,8 +1912,17 @@ do_witness_create(void)
|
|||||||
/* Check this directory could be used as a PGDATA dir */
|
/* Check this directory could be used as a PGDATA dir */
|
||||||
if (!create_pg_dir(runtime_options.dest_dir, runtime_options.force))
|
if (!create_pg_dir(runtime_options.dest_dir, runtime_options.force))
|
||||||
{
|
{
|
||||||
log_err(_("witness create: couldn't create data directory (\"%s\") for witness"),
|
PQExpBufferData errmsg;
|
||||||
runtime_options.dest_dir);
|
initPQExpBuffer(&errmsg);
|
||||||
|
appendPQExpBuffer(&errmsg,
|
||||||
|
_("%s: ouldn't create witness server data directory (\"%s\")"),
|
||||||
|
progname, runtime_options.host);
|
||||||
|
log_err("%s\n", errmsg.data);
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg.data);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1819,7 +1946,13 @@ do_witness_create(void)
|
|||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err("Can't initialize cluster for witness server\n");
|
char *errmsg = "Unable to initialize cluster for witness server";
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1832,8 +1965,21 @@ do_witness_create(void)
|
|||||||
pg_conf = fopen(buf, "a");
|
pg_conf = fopen(buf, "a");
|
||||||
if (pg_conf == NULL)
|
if (pg_conf == NULL)
|
||||||
{
|
{
|
||||||
log_err(_("%s: could not open \"%s\" for adding extra config: %s\n"),
|
PQExpBufferData errmsg;
|
||||||
progname, buf, strerror(errno));
|
initPQExpBuffer(&errmsg);
|
||||||
|
appendPQExpBuffer(&errmsg,
|
||||||
|
_("%s: could not open \"%s\" for adding extra config: %s\n"),
|
||||||
|
progname,
|
||||||
|
buf,
|
||||||
|
strerror(errno));
|
||||||
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg.data);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1863,7 +2009,15 @@ do_witness_create(void)
|
|||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("Can't start cluster for witness server\n"));
|
char *errmsg = _("Unable to start cluster for witness server");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1880,7 +2034,14 @@ do_witness_create(void)
|
|||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("Can't create user for witness server\n"));
|
char *errmsg = _("Unable to create user for witness server");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1898,7 +2059,15 @@ do_witness_create(void)
|
|||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err("Can't create database for witness server\n");
|
char *errmsg = _("Unable to create database for witness server");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1909,7 +2078,15 @@ do_witness_create(void)
|
|||||||
|
|
||||||
if (success == false)
|
if (success == false)
|
||||||
{
|
{
|
||||||
log_err(_("Can't get info about pg_hba.conf\n"));
|
char *errmsg = _("Unable to retrieve location of pg_hba.conf");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
|
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1917,7 +2094,15 @@ do_witness_create(void)
|
|||||||
master_hba_file, runtime_options.dest_dir, false, -1);
|
master_hba_file, runtime_options.dest_dir, false, -1);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("Can't rsync the pg_hba.conf file from master\n"));
|
char *errmsg = _("Unable to copy pg_hba.conf from master");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1930,26 +2115,40 @@ do_witness_create(void)
|
|||||||
r = system(script);
|
r = system(script);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
log_err(_("Can't reload cluster for witness server\n"));
|
char *errmsg = _("Unable reload witness server");
|
||||||
|
log_err("%s\n", errmsg);
|
||||||
|
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
errmsg);
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* register ourselves in the master */
|
/* register ourselves in the master */
|
||||||
|
|
||||||
node_record_created = create_node_record(masterconn,
|
record_created = create_node_record(masterconn,
|
||||||
"witness create",
|
"witness create",
|
||||||
options.node,
|
options.node,
|
||||||
"witness",
|
"witness",
|
||||||
NO_UPSTREAM_NODE,
|
NO_UPSTREAM_NODE,
|
||||||
options.cluster_name,
|
options.cluster_name,
|
||||||
options.node_name,
|
options.node_name,
|
||||||
options.conninfo,
|
options.conninfo,
|
||||||
options.priority,
|
options.priority,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if(node_record_created == false)
|
if(record_created == false)
|
||||||
{
|
{
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
"Unable to create witness node record on master");
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
@@ -1961,6 +2160,11 @@ do_witness_create(void)
|
|||||||
|
|
||||||
if (!create_schema(witnessconn))
|
if (!create_schema(witnessconn))
|
||||||
{
|
{
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
"Unable to create schema on witness");
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
PQfinish(witnessconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1969,6 +2173,11 @@ do_witness_create(void)
|
|||||||
/* copy configuration from master, only repl_nodes is needed */
|
/* copy configuration from master, only repl_nodes is needed */
|
||||||
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
|
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
|
||||||
{
|
{
|
||||||
|
create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
false,
|
||||||
|
"Unable to copy configuration from master");
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
PQfinish(witnessconn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1991,6 +2200,20 @@ do_witness_create(void)
|
|||||||
exit(ERR_DB_QUERY);
|
exit(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Log the event */
|
||||||
|
record_created = create_event_record(masterconn,
|
||||||
|
options.node,
|
||||||
|
"witness_create",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if(record_created == false)
|
||||||
|
{
|
||||||
|
PQfinish(masterconn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(masterconn);
|
PQfinish(masterconn);
|
||||||
PQfinish(witnessconn);
|
PQfinish(witnessconn);
|
||||||
|
|
||||||
@@ -2574,6 +2797,29 @@ create_schema(PGconn *conn)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/* repl_events */
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"CREATE TABLE %s.repl_events ( "
|
||||||
|
" node_id INTEGER NOT NULL, "
|
||||||
|
" event TEXT NOT NULL, "
|
||||||
|
" successful BOOLEAN NOT NULL DEFAULT TRUE, "
|
||||||
|
" event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, "
|
||||||
|
" details TEXT NULL "
|
||||||
|
" ) ",
|
||||||
|
get_repmgr_schema_quoted(conn));
|
||||||
|
|
||||||
|
log_debug(_("master register: %s\n"), sqlquery);
|
||||||
|
res = PQexec(conn, sqlquery);
|
||||||
|
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Cannot create the table %s.repl_events: %s\n"),
|
||||||
|
get_repmgr_schema_quoted(conn), PQerrorMessage(conn));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX Here we MUST try to load the repmgr_function.sql not hardcode it
|
* XXX Here we MUST try to load the repmgr_function.sql not hardcode it
|
||||||
* here
|
* here
|
||||||
@@ -2590,6 +2836,8 @@ create_schema(PGconn *conn)
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n",
|
fprintf(stderr, "Cannot create the function repmgr_update_standby_location: %s\n",
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
@@ -2606,6 +2854,7 @@ create_schema(PGconn *conn)
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n",
|
fprintf(stderr, "Cannot create the function repmgr_get_last_standby_location: %s\n",
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|||||||
103
repmgrd.c
103
repmgrd.c
@@ -17,7 +17,7 @@
|
|||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public License
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@@ -33,6 +33,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "repmgr.h"
|
#include "repmgr.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
@@ -41,6 +42,7 @@
|
|||||||
|
|
||||||
/* Required PostgreSQL headers */
|
/* Required PostgreSQL headers */
|
||||||
#include "access/xlogdefs.h"
|
#include "access/xlogdefs.h"
|
||||||
|
#include "pqexpbuffer.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct to store node information
|
* Struct to store node information
|
||||||
@@ -154,6 +156,7 @@ main(int argc, char **argv)
|
|||||||
int optindex;
|
int optindex;
|
||||||
int c;
|
int c;
|
||||||
bool daemonize = false;
|
bool daemonize = false;
|
||||||
|
bool startup_event_logged = false;
|
||||||
|
|
||||||
FILE *fd;
|
FILE *fd;
|
||||||
|
|
||||||
@@ -318,6 +321,17 @@ main(int argc, char **argv)
|
|||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Log startup event */
|
||||||
|
if(startup_event_logged == false)
|
||||||
|
{
|
||||||
|
create_event_record(primary_conn,
|
||||||
|
local_options.node,
|
||||||
|
"repmgrd_start",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
startup_event_logged = true;
|
||||||
|
}
|
||||||
|
|
||||||
log_info(_("%s Starting continuous primary connection check\n"),
|
log_info(_("%s Starting continuous primary connection check\n"),
|
||||||
progname);
|
progname);
|
||||||
|
|
||||||
@@ -332,7 +346,7 @@ main(int argc, char **argv)
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
log_debug("primary check loop...\n");
|
log_debug("primary check loop...\n"); // ZZZ
|
||||||
if (check_connection(primary_conn, "master"))
|
if (check_connection(primary_conn, "master"))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -406,6 +420,16 @@ main(int argc, char **argv)
|
|||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
|
/* Log startup event */
|
||||||
|
if(startup_event_logged == false)
|
||||||
|
{
|
||||||
|
create_event_record(primary_conn,
|
||||||
|
local_options.node,
|
||||||
|
"repmgrd_start",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
startup_event_logged = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Every local_options.monitor_interval_secs seconds, do
|
* Every local_options.monitor_interval_secs seconds, do
|
||||||
@@ -1316,6 +1340,9 @@ do_primary_failover(void)
|
|||||||
/* if local node is the best candidate, promote it */
|
/* if local node is the best candidate, promote it */
|
||||||
if (best_candidate.node_id == local_options.node)
|
if (best_candidate.node_id == local_options.node)
|
||||||
{
|
{
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
/* wait */
|
/* wait */
|
||||||
sleep(5);
|
sleep(5);
|
||||||
|
|
||||||
@@ -1335,6 +1362,12 @@ do_primary_failover(void)
|
|||||||
{
|
{
|
||||||
log_err(_("%s: promote command failed. You could check and try it manually.\n"),
|
log_err(_("%s: promote command failed. You could check and try it manually.\n"),
|
||||||
progname);
|
progname);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point there's no valid primary we can write to,
|
||||||
|
* so can't add an entry to the event table.
|
||||||
|
*/
|
||||||
|
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1344,16 +1377,42 @@ do_primary_failover(void)
|
|||||||
/* update node information to reflect new status */
|
/* update node information to reflect new status */
|
||||||
if(update_node_record_set_primary(my_local_conn, node_info.node_id, failed_primary.node_id) == false)
|
if(update_node_record_set_primary(my_local_conn, node_info.node_id, failed_primary.node_id) == false)
|
||||||
{
|
{
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Unable to update node record for node %i (promoted to master following failure of node %i)"),
|
||||||
|
node_info.node_id,
|
||||||
|
failed_primary.node_id);
|
||||||
|
|
||||||
|
create_event_record(my_local_conn,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_promote",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update internal record for this node*/
|
/* update internal record for this node*/
|
||||||
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Node %i promoted to master; old master %i marked as failed"),
|
||||||
|
node_info.node_id,
|
||||||
|
failed_primary.node_id);
|
||||||
|
|
||||||
|
create_event_record(my_local_conn,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_promote",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
}
|
}
|
||||||
/* local node not promotion candidate - find the new primary */
|
/* local node not promotion candidate - find the new primary */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PGconn *new_primary_conn;
|
PGconn *new_primary_conn;
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
/* wait */
|
/* wait */
|
||||||
sleep(10);
|
sleep(10);
|
||||||
|
|
||||||
@@ -1381,9 +1440,20 @@ do_primary_failover(void)
|
|||||||
{
|
{
|
||||||
if(create_replication_slot(new_primary_conn, node_info.slot_name) == false)
|
if(create_replication_slot(new_primary_conn, node_info.slot_name) == false)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to create slot '%s' on the primary node: %s\n"),
|
|
||||||
node_info.slot_name,
|
appendPQExpBuffer(&event_details,
|
||||||
PQerrorMessage(new_primary_conn));
|
_("Unable to create slot '%s' on the primary node: %s"),
|
||||||
|
node_info.slot_name,
|
||||||
|
PQerrorMessage(new_primary_conn));
|
||||||
|
|
||||||
|
create_event_record(new_primary_conn,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_primary_conn);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
@@ -1403,13 +1473,35 @@ do_primary_failover(void)
|
|||||||
/* update node information to reflect new status */
|
/* update node information to reflect new status */
|
||||||
if(update_node_record_set_upstream(new_primary_conn, node_info.node_id, best_candidate.node_id) == false)
|
if(update_node_record_set_upstream(new_primary_conn, node_info.node_id, best_candidate.node_id) == false)
|
||||||
{
|
{
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Unable to update node record for node %i (following new upstream node %i)"),
|
||||||
|
node_info.node_id,
|
||||||
|
best_candidate.node_id);
|
||||||
|
|
||||||
|
create_event_record(new_primary_conn,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update internal record for this node*/
|
/* update internal record for this node*/
|
||||||
node_info = get_node_info(new_primary_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(new_primary_conn, local_options.cluster_name, local_options.node);
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("Node %i now following new upstream node %i"),
|
||||||
|
node_info.node_id,
|
||||||
|
best_candidate.node_id);
|
||||||
|
|
||||||
|
create_event_record(new_primary_conn,
|
||||||
|
node_info.node_id,
|
||||||
|
"repmgrd_failover_follow",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_primary_conn);
|
||||||
|
destroyPQExpBuffer(&event_details);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("failover done\n"); // ZZZ
|
log_debug("failover done\n"); // ZZZ
|
||||||
@@ -1515,6 +1607,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
r = system(local_options.follow_command);
|
r = system(local_options.follow_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
{
|
{
|
||||||
|
// ZZZ fail
|
||||||
log_err(_("%s: follow command failed. You could check and try it manually.\n"),
|
log_err(_("%s: follow command failed. You could check and try it manually.\n"),
|
||||||
progname);
|
progname);
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
|
|||||||
Reference in New Issue
Block a user