"repmgr node ...": fixes for 9.3

Mainly to account for the lack of replication slots.
This commit is contained in:
Ian Barwick
2017-11-16 11:25:16 +09:00
parent b8b991398a
commit 9165d27f9f
3 changed files with 105 additions and 114 deletions

View File

@@ -2593,7 +2593,7 @@ truncate_node_records(PGconn *conn)
} }
void void
get_node_replication_stats(PGconn *conn, t_node_info *node_info) get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info)
{ {
PQExpBufferData query; PQExpBufferData query;
PGresult *res = NULL; PGresult *res = NULL;
@@ -2602,13 +2602,32 @@ get_node_replication_stats(PGconn *conn, t_node_info *node_info)
appendPQExpBuffer(&query, appendPQExpBuffer(&query,
" SELECT current_setting('max_wal_senders')::INT AS max_wal_senders, " " SELECT current_setting('max_wal_senders')::INT AS max_wal_senders, "
" (SELECT COUNT(*) FROM pg_catalog.pg_stat_replication) AS attached_wal_receivers, " " (SELECT COUNT(*) FROM pg_catalog.pg_stat_replication) AS attached_wal_receivers, ");
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots) AS total_replication_slots, " /* no replication slots in PostgreSQL 9.3 */
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, " if (server_version_num < 90400)
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots, " {
appendPQExpBuffer(&query,
" 0 AS max_replication_slots, "
" 0 AS total_replication_slots, "
" 0 AS active_replication_slots, "
" 0 AS inactive_replication_slots, ");
}
else
{
appendPQExpBuffer(&query,
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots) AS total_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, "
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots, ");
}
appendPQExpBuffer(&query,
" pg_catalog.pg_is_in_recovery() AS in_recovery"); " pg_catalog.pg_is_in_recovery() AS in_recovery");
res = PQexec(conn, query.data); res = PQexec(conn, query.data);
termPQExpBuffer(&query); termPQExpBuffer(&query);

View File

@@ -487,7 +487,7 @@ XLogRecPtr get_current_wal_lsn(PGconn *conn);
XLogRecPtr get_last_wal_receive_location(PGconn *conn); XLogRecPtr get_last_wal_receive_location(PGconn *conn);
bool get_replication_info(PGconn *conn, ReplInfo *replication_info); bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
int get_replication_lag_seconds(PGconn *conn); int get_replication_lag_seconds(PGconn *conn);
void get_node_replication_stats(PGconn *conn, t_node_info *node_info); void get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info);
bool is_downstream_node_attached(PGconn *conn, char *node_name); bool is_downstream_node_attached(PGconn *conn, char *node_name);
/* BDR functions */ /* BDR functions */

View File

@@ -105,7 +105,7 @@ do_node_status(void)
recovery_type = get_recovery_type(conn); recovery_type = get_recovery_type(conn);
get_node_replication_stats(conn, &node_info); get_node_replication_stats(conn, server_version_num, &node_info);
key_value_list_set( key_value_list_set(
&node_status, &node_status,
@@ -264,7 +264,13 @@ do_node_status(void)
"disabled"); "disabled");
} }
if (node_info.max_replication_slots > 0) if (server_version_num < 90400)
{
key_value_list_set(&node_status,
"Replication slots",
"not available");
}
else if (node_info.max_replication_slots > 0)
{ {
PQExpBufferData slotinfo; PQExpBufferData slotinfo;
@@ -279,8 +285,7 @@ do_node_status(void)
if (node_info.inactive_replication_slots > 0) if (node_info.inactive_replication_slots > 0)
{ {
appendPQExpBuffer( appendPQExpBuffer(&slotinfo,
&slotinfo,
"; %i inactive", "; %i inactive",
node_info.inactive_replication_slots); node_info.inactive_replication_slots);
@@ -290,8 +295,7 @@ do_node_status(void)
node_info.inactive_replication_slots); node_info.inactive_replication_slots);
} }
key_value_list_set( key_value_list_set(&node_status,
&node_status,
"Replication slots", "Replication slots",
slotinfo.data); slotinfo.data);
@@ -299,8 +303,7 @@ do_node_status(void)
} }
else if (node_info.max_replication_slots == 0) else if (node_info.max_replication_slots == 0)
{ {
key_value_list_set( key_value_list_set(&node_status,
&node_status,
"Replication slots", "Replication slots",
"disabled"); "disabled");
} }
@@ -308,8 +311,7 @@ do_node_status(void)
if (node_info.type == STANDBY) if (node_info.type == STANDBY)
{ {
key_value_list_set_format( key_value_list_set_format(&node_status,
&node_status,
"Upstream node", "Upstream node",
"%s (ID: %i)", "%s (ID: %i)",
node_info.node_name, node_info.node_name,
@@ -317,46 +319,47 @@ do_node_status(void)
get_replication_info(conn, &replication_info); get_replication_info(conn, &replication_info);
key_value_list_set_format( key_value_list_set_format(&node_status,
&node_status,
"Replication lag", "Replication lag",
"%i seconds", "%i seconds",
replication_info.replication_lag_time); replication_info.replication_lag_time);
key_value_list_set_format( key_value_list_set_format(&node_status,
&node_status,
"Last received LSN", "Last received LSN",
"%X/%X", format_lsn(replication_info.last_wal_receive_lsn)); "%X/%X", format_lsn(replication_info.last_wal_receive_lsn));
key_value_list_set_format( key_value_list_set_format(&node_status,
&node_status,
"Last replayed LSN", "Last replayed LSN",
"%X/%X", format_lsn(replication_info.last_wal_replay_lsn)); "%X/%X", format_lsn(replication_info.last_wal_replay_lsn));
} }
else else
{ {
key_value_list_set( key_value_list_set(&node_status,
&node_status,
"Upstream node", "Upstream node",
"(none)"); "(none)");
key_value_list_set_output_mode(&node_status, "Upstream node", OM_CSV); key_value_list_set_output_mode(&node_status,
"Upstream node",
OM_CSV);
key_value_list_set( key_value_list_set(&node_status,
&node_status,
"Replication lag", "Replication lag",
"n/a"); "n/a");
key_value_list_set( key_value_list_set(&node_status,
&node_status,
"Last received LSN", "Last received LSN",
"(none)"); "(none)");
key_value_list_set_output_mode(&node_status, "Last received LSN", OM_CSV);
key_value_list_set( key_value_list_set_output_mode(&node_status,
&node_status, "Last received LSN",
OM_CSV);
key_value_list_set(&node_status,
"Last replayed LSN", "Last replayed LSN",
"(none)"); "(none)");
key_value_list_set_output_mode(&node_status, "Last replayed LSN", OM_CSV);
key_value_list_set_output_mode(&node_status,
"Last replayed LSN",
OM_CSV);
} }
@@ -367,64 +370,55 @@ do_node_status(void)
if (runtime_options.output_mode == OM_CSV) if (runtime_options.output_mode == OM_CSV)
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"Node name\",\"%s\"\n", "\"Node name\",\"%s\"\n",
node_info.node_name); node_info.node_name);
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"Node ID\",\"%i\"\n", "\"Node ID\",\"%i\"\n",
node_info.node_id); node_info.node_id);
for (cell = node_status.head; cell; cell = cell->next) for (cell = node_status.head; cell; cell = cell->next)
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"%s\",\"%s\"\n", "\"%s\",\"%s\"\n",
cell->key, cell->value); cell->key, cell->value);
} }
/* we'll add the raw data as well */ /* we'll add the raw data as well */
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"max_wal_senders\",%i\n", "\"max_wal_senders\",%i\n",
node_info.max_wal_senders); node_info.max_wal_senders);
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"occupied_wal_senders\",%i\n", "\"occupied_wal_senders\",%i\n",
node_info.attached_wal_receivers); node_info.attached_wal_receivers);
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"max_replication_slots\",%i\n", "\"max_replication_slots\",%i\n",
node_info.max_replication_slots); node_info.max_replication_slots);
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"active_replication_slots\",%i\n", "\"active_replication_slots\",%i\n",
node_info.active_replication_slots); node_info.active_replication_slots);
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"\"inactive_replaction_slots\",%i\n", "\"inactive_replaction_slots\",%i\n",
node_info.inactive_replication_slots); node_info.inactive_replication_slots);
} }
else else
{ {
appendPQExpBuffer( appendPQExpBuffer(&output,
&output,
"Node \"%s\":\n", "Node \"%s\":\n",
node_info.node_name); node_info.node_name);
for (cell = node_status.head; cell; cell = cell->next) for (cell = node_status.head; cell; cell = cell->next)
{ {
if (cell->output_mode == OM_NOT_SET) if (cell->output_mode == OM_NOT_SET)
appendPQExpBuffer( appendPQExpBuffer(&output,
&output, "\t%s: %s\n",
"\t%s: %s\n", cell->key, cell->value); cell->key, cell->value);
} }
} }
@@ -581,8 +575,10 @@ do_node_check(void)
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
server_version_num = get_server_version(conn, NULL);
/* add replication statistics to node record */ /* add replication statistics to node record */
get_node_replication_stats(conn, &node_info); get_node_replication_stats(conn, server_version_num, &node_info);
/* /*
* handle specific checks ====================== * handle specific checks ======================
@@ -704,8 +700,7 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
if (recovery_type == RECTYPE_STANDBY) if (recovery_type == RECTYPE_STANDBY)
{ {
status = CHECK_STATUS_CRITICAL; status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("node is registered as primary but running as standby")); _("node is registered as primary but running as standby"));
} }
else else
@@ -719,14 +714,12 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
if (recovery_type == RECTYPE_PRIMARY) if (recovery_type == RECTYPE_PRIMARY)
{ {
status = CHECK_STATUS_CRITICAL; status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("node is registered as standby but running as primary")); _("node is registered as standby but running as primary"));
} }
else else
{ {
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("node is standby")); _("node is standby"));
} }
break; break;
@@ -749,8 +742,7 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
if (is_active_bdr_node(conn, node_info->node_name) == false) if (is_active_bdr_node(conn, node_info->node_name) == false)
{ {
status = CHECK_STATUS_CRITICAL; status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("node is not an active BDR node")); _("node is not an active BDR node"));
} }
} }
@@ -798,16 +790,19 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
initPQExpBuffer(&details); initPQExpBuffer(&details);
if (node_info->total_replication_slots == 0) if (server_version_num < 90400)
{ {
appendPQExpBuffer( appendPQExpBuffer(&details,
&details, _("replication slots not available for this PostgreSQL version"));
}
else if (node_info->total_replication_slots == 0)
{
appendPQExpBuffer(&details,
_("node has no replication slots")); _("node has no replication slots"));
} }
else if (node_info->inactive_replication_slots == 0) else if (node_info->inactive_replication_slots == 0)
{ {
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("%i of %i replication slots are active"), _("%i of %i replication slots are active"),
node_info->total_replication_slots, node_info->total_replication_slots,
node_info->total_replication_slots); node_info->total_replication_slots);
@@ -816,8 +811,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
{ {
status = CHECK_STATUS_CRITICAL; status = CHECK_STATUS_CRITICAL;
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
_("%i of %i replication slots are inactive"), _("%i of %i replication slots are inactive"),
node_info->inactive_replication_slots, node_info->inactive_replication_slots,
node_info->total_replication_slots); node_info->total_replication_slots);
@@ -880,14 +874,12 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--files=%i --threshold=%i", "--files=%i --threshold=%i",
ready_archive_files, config_file_options.archive_ready_critical); ready_archive_files, config_file_options.archive_ready_critical);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files | files=%i;%i;%i", "%i pending archive ready files | files=%i;%i;%i",
ready_archive_files, ready_archive_files,
ready_archive_files, ready_archive_files,
@@ -895,8 +887,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
config_file_options.archive_ready_critical); config_file_options.archive_ready_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files, critical threshold: %i", "%i pending archive ready files, critical threshold: %i",
ready_archive_files, config_file_options.archive_ready_critical); ready_archive_files, config_file_options.archive_ready_critical);
break; break;
@@ -912,14 +903,12 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--files=%i --threshold=%i", "--files=%i --threshold=%i",
ready_archive_files, config_file_options.archive_ready_warning); ready_archive_files, config_file_options.archive_ready_warning);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files | files=%i;%i;%i", "%i pending archive ready files | files=%i;%i;%i",
ready_archive_files, ready_archive_files,
ready_archive_files, ready_archive_files,
@@ -928,8 +917,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files (threshold: %i)", "%i pending archive ready files (threshold: %i)",
ready_archive_files, config_file_options.archive_ready_warning); ready_archive_files, config_file_options.archive_ready_warning);
break; break;
@@ -964,13 +952,11 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--files=%i", ready_archive_files); "--files=%i", ready_archive_files);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files | files=%i;%i;%i", "%i pending archive ready files | files=%i;%i;%i",
ready_archive_files, ready_archive_files,
ready_archive_files, ready_archive_files,
@@ -978,8 +964,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
config_file_options.archive_ready_critical); config_file_options.archive_ready_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i pending archive ready files", ready_archive_files); "%i pending archive ready files", ready_archive_files);
break; break;
@@ -1079,14 +1064,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--lag=%i --threshold=%i", "--lag=%i --threshold=%i",
lag_seconds, config_file_options.replication_lag_critical); lag_seconds, config_file_options.replication_lag_critical);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds | lag=%i;%i;%i", "%i seconds | lag=%i;%i;%i",
lag_seconds, lag_seconds,
lag_seconds, lag_seconds,
@@ -1094,8 +1077,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
config_file_options.replication_lag_critical); config_file_options.replication_lag_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds, critical threshold: %i)", "%i seconds, critical threshold: %i)",
lag_seconds, config_file_options.replication_lag_critical); lag_seconds, config_file_options.replication_lag_critical);
break; break;
@@ -1111,14 +1093,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--lag=%i --threshold=%i", "--lag=%i --threshold=%i",
lag_seconds, config_file_options.replication_lag_warning); lag_seconds, config_file_options.replication_lag_warning);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds | lag=%i;%i;%i", "%i seconds | lag=%i;%i;%i",
lag_seconds, lag_seconds,
lag_seconds, lag_seconds,
@@ -1126,8 +1106,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
config_file_options.replication_lag_critical); config_file_options.replication_lag_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds, warning threshold: %i)", "%i seconds, warning threshold: %i)",
lag_seconds, config_file_options.replication_lag_warning); lag_seconds, config_file_options.replication_lag_warning);
break; break;
@@ -1162,14 +1141,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"--lag=%i", "--lag=%i",
lag_seconds); lag_seconds);
break; break;
case OM_NAGIOS: case OM_NAGIOS:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds | lag=%i;%i;%i", "%i seconds | lag=%i;%i;%i",
lag_seconds, lag_seconds,
lag_seconds, lag_seconds,
@@ -1177,8 +1154,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
config_file_options.replication_lag_critical); config_file_options.replication_lag_critical);
break; break;
case OM_TEXT: case OM_TEXT:
appendPQExpBuffer( appendPQExpBuffer(&details,
&details,
"%i seconds", "%i seconds",
lag_seconds); lag_seconds);
break; break;
@@ -1313,7 +1289,6 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
{ {
case OM_NAGIOS: case OM_NAGIOS:
{ {
printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | ", printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | ",
output_check_status(status), output_check_status(status),
details.data); details.data);
@@ -1714,17 +1689,14 @@ do_node_rejoin(void)
/* execute pg_rewind */ /* execute pg_rewind */
initPQExpBuffer(&command); initPQExpBuffer(&command);
appendPQExpBuffer( appendPQExpBuffer(&command,
&command,
"%s -D ", "%s -D ",
make_pg_path("pg_rewind")); make_pg_path("pg_rewind"));
appendShellString( appendShellString(&command,
&command,
config_file_options.data_directory); config_file_options.data_directory);
appendPQExpBuffer( appendPQExpBuffer(&command,
&command,
" --source-server='%s'", " --source-server='%s'",
primary_node_record.conninfo); primary_node_record.conninfo);