Add --data-directory-config option to "repmgr node check"

Implements part of GitHub #523.
This commit is contained in:
Ian Barwick
2019-01-16 11:21:51 +09:00
parent 4523137bfc
commit 0b3a310802
11 changed files with 216 additions and 41 deletions

View File

@@ -2,6 +2,7 @@
repmgr: add --version-number command line option (Ian) repmgr: add --version-number command line option (Ian)
repmgr: add --terse option to "cluster show"; GitHub #521 (Ian) repmgr: add --terse option to "cluster show"; GitHub #521 (Ian)
repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian) repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian)
repmgr: add "node check --data-directory-config"; GitHub #523 (Ian)
repmgr: "standby switchover": improve handling of connection URIs when repmgr: "standby switchover": improve handling of connection URIs when
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian) executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
repmgrd: check binary and extension major versions match; GitHub #515 (Ian) repmgrd: check binary and extension major versions match; GitHub #515 (Ian)

View File

@@ -198,9 +198,9 @@ mkdir_p(char *path, mode_t omode)
bool bool
is_pg_dir(char *path) is_pg_dir(const char *path)
{ {
char dirpath[MAXPGPATH]; char dirpath[MAXPGPATH] = "";
struct stat sb; struct stat sb;
/* test pgdata */ /* test pgdata */

View File

@@ -39,7 +39,7 @@ extern bool set_dir_permissions(char *path);
extern DataDirState check_dir(char *path); extern DataDirState check_dir(char *path);
extern bool create_dir(char *path); extern bool create_dir(char *path);
extern bool is_pg_dir(char *path); extern bool is_pg_dir(const char *path);
extern PgDirState is_pg_running(char *path); extern PgDirState is_pg_running(char *path);
extern bool create_pg_dir(char *path, bool force); extern bool create_pg_dir(char *path, bool force);
extern int rmdir_recursive(char *path); extern int rmdir_recursive(char *path);

View File

@@ -50,6 +50,13 @@
</para> </para>
</listitem> </listitem>
<listitem>
<para>
<link linkend="repmgr-node-check"><command>repmgr node check --data-directory-config</command></link>
option added; this is to confirm &repmgr; is correctly configured.
</para>
</listitem>
</itemizedlist> </itemizedlist>
</para> </para>
</sect2> </sect2>

View File

@@ -87,44 +87,44 @@
<varlistentry> <varlistentry>
<term><option>--csv</option></term> <term><option>--csv</option></term>
<listitem> <listitem>
<para> <para>
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which <command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
outputs the replication cluster's status in a simple CSV format, suitable for outputs the replication cluster's status in a simple CSV format, suitable for
parsing by scripts, e.g.: parsing by scripts, e.g.:
<programlisting> <programlisting>
$ repmgr -f /etc/repmgr.conf cluster show --csv $ repmgr -f /etc/repmgr.conf cluster show --csv
1,-1,-1 1,-1,-1
2,0,0 2,0,0
3,0,1</programlisting> 3,0,1</programlisting>
</para> </para>
<para> <para>
The columns have following meanings: The columns have following meanings:
<itemizedlist spacing="compact" mark="bullet"> <itemizedlist spacing="compact" mark="bullet">
<listitem> <listitem>
<simpara> <simpara>
node ID node ID
</simpara> </simpara>
</listitem> </listitem>
<listitem> <listitem>
<simpara> <simpara>
availability (0 = available, -1 = unavailable) availability (0 = available, -1 = unavailable)
</simpara> </simpara>
</listitem> </listitem>
<listitem> <listitem>
<simpara> <simpara>
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown) recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
</simpara> </simpara>
</listitem> </listitem>
</itemizedlist> </itemizedlist>
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><option>--terse</option></term> <term><option>--terse</option></term>
<listitem> <listitem>
<para> <para>
Suppress display of the <literal>conninfo</literal> column. Suppress display of the <literal>conninfo</literal> column.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@@ -86,6 +86,16 @@
</simpara> </simpara>
</listitem> </listitem>
<listitem>
<simpara>
<literal>--data-directory-config</literal>: checks the data directory configured in
<filename>repmgr.conf</filename> matches the actual data directory.
This check is not directly related to replication, but is useful to verify &repmgr;
is correctly configured.
</simpara>
</listitem>
</itemizedlist> </itemizedlist>
</para> </para>
</refsect1> </refsect1>
@@ -105,6 +115,7 @@
<listitem> <listitem>
<simpara> <simpara>
<literal>--nagios</literal>: generate output in a Nagios-compatible format <literal>--nagios</literal>: generate output in a Nagios-compatible format
(for individual checks only)
</simpara> </simpara>
</listitem> </listitem>
</itemizedlist> </itemizedlist>

View File

@@ -48,7 +48,7 @@ static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode,
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
/* /*
* NODE STATUS * NODE STATUS
* *
@@ -82,6 +82,9 @@ do_node_status(void)
int server_version_num = UNKNOWN_SERVER_VERSION_NUM; int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
char server_version_str[MAXVERSIONSTR] = ""; char server_version_str[MAXVERSIONSTR] = "";
/*
* A database connection is *not* required for this check
*/
if (runtime_options.is_shutdown_cleanly == true) if (runtime_options.is_shutdown_cleanly == true)
{ {
return _do_node_status_is_shutdown_cleanly(); return _do_node_status_is_shutdown_cleanly();
@@ -653,7 +656,8 @@ _do_node_status_is_shutdown_cleanly(void)
node_status = NODE_STATUS_DOWN; node_status = NODE_STATUS_DOWN;
} }
log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status)); log_verbose(LOG_DEBUG, "node status determined as: %s",
print_node_status(node_status));
switch (node_status) switch (node_status)
{ {
@@ -789,6 +793,16 @@ do_node_check(void)
exit(return_code); exit(return_code);
} }
if (runtime_options.data_directory_config == true)
{
return_code = do_node_check_data_directory(conn,
runtime_options.output_mode,
&node_info,
NULL);
PQfinish(conn);
exit(return_code);
}
if (runtime_options.output_mode == OM_NAGIOS) if (runtime_options.output_mode == OM_NAGIOS)
{ {
@@ -821,6 +835,9 @@ do_node_check(void)
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK) if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
issue_detected = true; issue_detected = true;
if (do_node_check_data_directory(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
issue_detected = true;
if (runtime_options.output_mode == OM_CSV) if (runtime_options.output_mode == OM_CSV)
{ {
appendPQExpBuffer(&output, appendPQExpBuffer(&output,
@@ -1442,11 +1459,9 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
switch (mode) switch (mode)
{ {
case OM_OPTFORMAT: case OM_OPTFORMAT:
{ printf("--status=%s %s\n",
printf("--status=%s %s\n", output_check_status(status),
output_check_status(status), details.data);
details.data);
}
break; break;
case OM_NAGIOS: case OM_NAGIOS:
printf("REPMGR_REPLICATION_LAG %s: %s\n", printf("REPMGR_REPLICATION_LAG %s: %s\n",
@@ -1797,6 +1812,135 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
} }
CheckStatus
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
char actual_data_directory[MAXPGPATH] = "";
PQExpBufferData details;
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --data-directory-config option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
/*
* Check actual data directory matches that in repmgr.conf; note this requires
* a superuser connection
*/
if (is_superuser_connection(conn, NULL) == true)
{
/* we expect to have a database connection */
if (get_pg_setting(conn, "data_directory", actual_data_directory) == false)
{
appendPQExpBuffer(&details,
_("unable to determine current \"data_directory\""));
status = CHECK_STATUS_UNKNOWN;
}
if (strncmp(actual_data_directory, config_file_options.data_directory, MAXPGPATH) != 0)
{
if (mode != OM_NAGIOS)
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" is \"%s\"; "),
config_file_options.data_directory);
}
appendPQExpBuffer(&details,
"actual data directory is \"%s\"",
actual_data_directory);
status = CHECK_STATUS_CRITICAL;
}
else
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" is \"%s\""),
config_file_options.data_directory);
}
}
/*
* If no superuser connection available, sanity-check that the configuration directory looks
* like a PostgreSQL directory and hope it's the right one.
*/
else
{
if (mode == OM_TEXT)
{
log_info(_("connection is not a superuser connection, falling back to simple check"));
/* XXX add -S/--superuser option */
if (PQserverVersion(conn) >= 100000)
{
log_hint(_("add the \"%s\" user to group \"pg_read_all_settings\""),
PQuser(conn));
}
}
if (is_pg_dir(config_file_options.data_directory) == false)
{
if (mode == OM_NAGIOS)
{
appendPQExpBufferStr(&details,
_("configured \"data_directory\" is not a PostgreSQL data directory"));
}
else
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" \"%s\" is not a PostgreSQL data directory"),
actual_data_directory);
}
status = CHECK_STATUS_CRITICAL;
}
}
switch (mode)
{
case OM_OPTFORMAT:
printf("--configured-data-directory=%s\n",
output_check_status(status));
break;
case OM_NAGIOS:
printf("REPMGR_DATA_DIRECTORY %s: %s",
output_check_status(status),
config_file_options.data_directory);
if (status == CHECK_STATUS_CRITICAL)
{
printf(" | %s", details.data);
}
puts("");
break;
case OM_CSV:
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Configured data directory",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details);
return status;
}
void void
do_node_service(void) do_node_service(void)
@@ -2924,8 +3068,8 @@ do_node_help(void)
puts(""); puts("");
printf(_(" Configuration file required, runs on local node only.\n")); printf(_(" Configuration file required, runs on local node only.\n"));
puts(""); puts("");
printf(_(" --csv emit output as CSV\n")); printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
puts(""); puts("");
printf(_(" Following options check an individual status:\n")); printf(_(" Following options check an individual status:\n"));
printf(_(" --archive-ready number of WAL files ready for archiving\n")); printf(_(" --archive-ready number of WAL files ready for archiving\n"));
@@ -2934,6 +3078,7 @@ do_node_help(void)
printf(_(" --role check node has expected role\n")); printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n")); printf(_(" --slots check for inactive replication slots\n"));
printf(_(" --missing-slots check for missing replication slots\n")); printf(_(" --missing-slots check for missing replication slots\n"));
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
puts(""); puts("");

View File

@@ -3472,6 +3472,10 @@ do_standby_switchover(void)
} }
termPQExpBuffer(&command_output); termPQExpBuffer(&command_output);
/* check remote repmgr has the data directory correctly configured */
// - add repmgr node check --data-directory
/* /*
* populate local node record with current state of various replication-related * populate local node record with current state of various replication-related
* values, so we can check for sufficient walsenders and replication slots * values, so we can check for sufficient walsenders and replication slots

View File

@@ -111,6 +111,7 @@ typedef struct
bool missing_slots; bool missing_slots;
bool has_passfile; bool has_passfile;
bool replication_connection; bool replication_connection;
bool data_directory_config;
/* "node rejoin" options */ /* "node rejoin" options */
char config_files[MAXLEN]; char config_files[MAXLEN];
@@ -161,7 +162,7 @@ typedef struct
/* "node status" options */ \ /* "node status" options */ \
false, \ false, \
/* "node check" options */ \ /* "node check" options */ \
false, false, false, false, false, false, false, false, \ false, false, false, false, false, false, false, false, false, \
/* "node rejoin" options */ \ /* "node rejoin" options */ \
"", \ "", \
/* "node service" options */ \ /* "node service" options */ \

View File

@@ -497,6 +497,10 @@ main(int argc, char **argv)
runtime_options.replication_connection = true; runtime_options.replication_connection = true;
break; break;
case OPT_DATA_DIRECTORY_CONFIG:
runtime_options.data_directory_config = true;
break;
/*-------------------- /*--------------------
* "node rejoin" options * "node rejoin" options
*-------------------- *--------------------

View File

@@ -93,6 +93,7 @@
#define OPT_MISSING_SLOTS 1041 #define OPT_MISSING_SLOTS 1041
#define OPT_REPMGRD_NO_PAUSE 1042 #define OPT_REPMGRD_NO_PAUSE 1042
#define OPT_VERSION_NUMBER 1043 #define OPT_VERSION_NUMBER 1043
#define OPT_DATA_DIRECTORY_CONFIG 1044
/* deprecated since 3.3 */ /* deprecated since 3.3 */
@@ -177,6 +178,7 @@ static struct option long_options[] =
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS}, {"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE}, {"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
{"replication-connection", no_argument, NULL, OPT_REPL_CONN}, {"replication-connection", no_argument, NULL, OPT_REPL_CONN},
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
/* "node rejoin" options */ /* "node rejoin" options */
{"config-files", required_argument, NULL, OPT_CONFIG_FILES}, {"config-files", required_argument, NULL, OPT_CONFIG_FILES},