Convert configuration file parsing to use flex

Previously, repmgr was using a very simple ad-hoc string-based parser,
which had various limitations and allowed configuration files to be
created in a way which could cause confusion and/or unexpected
behaviour.

For example, it accepted strings enclosed in single quotes, but treated
strings enclosed in double quotes literally. A node_name defined thusly:

    node_name="somenode"

would result in the literal value '"somenode"' being used, which could
lead to unobvious errors along the lines of:

    no record found for ""somenode""

The configuration file parser has been adapted from the one used by
PostgreSQL itself, so behaves more-or-less identically (though some
functions such as file inclusion are not supported in repmgr).

This makes configuration parsing more robust and consistent;
additionally, error reporting will be more precise.

Note this does mean that some repmgr.conf items previously accepted
as valid by repmgr will now be rejected; in particular this includes
strings containing spaces which are not enclosed in single quotes.
This commit is contained in:
Ian Barwick
2019-07-03 12:18:01 +09:00
parent ab7e527af8
commit 8d55cab25e
8 changed files with 740 additions and 363 deletions

3
.gitignore vendored
View File

@@ -53,3 +53,6 @@ repmgr
repmgrd repmgrd
repmgr4 repmgr4
repmgrd4 repmgrd4
# generated files
configfile-scan.c

View File

@@ -1,3 +1,6 @@
4.5 2019-??-??
general: parse configuration file using flex (Ian)
4.4 2019-06-27 4.4 2019-06-27
repmgr: improve "daemon status" output (Ian) repmgr: improve "daemon status" output (Ian)
repmgr: add "--siblings-follow" option to "standby promote" (Ian) repmgr: add "--siblings-follow" option to "standby promote" (Ian)

View File

@@ -2,6 +2,7 @@
# Makefile.global.in # Makefile.global.in
# @configure_input@ # @configure_input@
# Can only be built using pgxs # Can only be built using pgxs
USE_PGXS=1 USE_PGXS=1
@@ -29,3 +30,11 @@ include $(PGXS)
REPMGR_VERSION=$(shell awk '/^\#define REPMGR_VERSION / { print $3; }' ${repmgr_abs_srcdir}/repmgr_version.h.in | cut -d '"' -f 2) REPMGR_VERSION=$(shell awk '/^\#define REPMGR_VERSION / { print $3; }' ${repmgr_abs_srcdir}/repmgr_version.h.in | cut -d '"' -f 2)
REPMGR_RELEASE_DATE=$(shell awk '/^\#define REPMGR_RELEASE_DATE / { print $3; }' ${repmgr_abs_srcdir}/repmgr_version.h.in | cut -d '"' -f 2) REPMGR_RELEASE_DATE=$(shell awk '/^\#define REPMGR_RELEASE_DATE / { print $3; }' ${repmgr_abs_srcdir}/repmgr_version.h.in | cut -d '"' -f 2)
FLEX = flex
##########################################################################
#
# Global targets and rules
%.c: %.l
$(FLEX) $(FLEXFLAGS) -o'$@' $<

View File

@@ -54,14 +54,16 @@ $(info Building against PostgreSQL $(MAJORVERSION))
REPMGR_CLIENT_OBJS = repmgr-client.o \ REPMGR_CLIENT_OBJS = repmgr-client.o \
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \ repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \ repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o
DATE=$(shell date "+%Y-%m-%d") DATE=$(shell date "+%Y-%m-%d")
repmgr_version.h: repmgr_version.h.in repmgr_version.h: repmgr_version.h.in
$(SED) -E 's/REPMGR_VERSION_DATE.*""/REPMGR_VERSION_DATE "$(DATE)"/' $< >$@; \ $(SED) -E 's/REPMGR_VERSION_DATE.*""/REPMGR_VERSION_DATE "$(DATE)"/' $< >$@; \
$(SED) -i -E 's/PG_ACTUAL_VERSION_NUM/PG_ACTUAL_VERSION_NUM $(VERSION_NUM)/' $@ $(SED) -i -E 's/PG_ACTUAL_VERSION_NUM/PG_ACTUAL_VERSION_NUM $(VERSION_NUM)/' $@
configfile-scan.c: configfile-scan.l
$(REPMGR_CLIENT_OBJS): repmgr-client.h repmgr_version.h $(REPMGR_CLIENT_OBJS): repmgr-client.h repmgr_version.h
repmgr: $(REPMGR_CLIENT_OBJS) repmgr: $(REPMGR_CLIENT_OBJS)

344
configfile-scan.l Normal file
View File

@@ -0,0 +1,344 @@
/*
* Scanner for the configuration file
*/
%{
#include <setjmp.h>
#include "repmgr.h"
#include "configfile.h"
/*
* flex emits a yy_fatal_error() function that it calls in response to
* critical errors like malloc failure, file I/O errors, and detection of
* internal inconsistency. That function prints a message and calls exit().
* Mutate it to instead call our handler, which jumps out of the parser.
*/
#undef fprintf
#define fprintf(file, fmt, msg) CONF_flex_fatal(msg)
enum
{
CONF_ID = 1,
CONF_STRING = 2,
CONF_INTEGER = 3,
CONF_REAL = 4,
CONF_EQUALS = 5,
CONF_UNQUOTED_STRING = 6,
CONF_QUALIFIED_ID = 7,
CONF_EOL = 99,
CONF_ERROR = 100
};
static unsigned int ConfigFileLineno;
static const char *CONF_flex_fatal_errmsg;
static sigjmp_buf *CONF_flex_fatal_jmp;
static char *CONF_scanstr(const char *s);
static int CONF_flex_fatal(const char *msg);
%}
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="CONF_yy"
SIGN ("-"|"+")
DIGIT [0-9]
HEXDIGIT [0-9a-fA-F]
UNIT_LETTER [a-zA-Z]
INTEGER {SIGN}?({DIGIT}+|0x{HEXDIGIT}+){UNIT_LETTER}*
EXPONENT [Ee]{SIGN}?{DIGIT}+
REAL {SIGN}?{DIGIT}*"."{DIGIT}*{EXPONENT}?
LETTER [A-Za-z_\200-\377]
LETTER_OR_DIGIT [A-Za-z_0-9\200-\377]
ID {LETTER}{LETTER_OR_DIGIT}*
QUALIFIED_ID {ID}"."{ID}
UNQUOTED_STRING {LETTER}({LETTER_OR_DIGIT}|[-._:/])*
STRING \'([^'\\\n]|\\.|\'\')*\'
%%
\n ConfigFileLineno++; return CONF_EOL;
[ \t\r]+ /* eat whitespace */
#.* /* eat comment (.* matches anything until newline) */
{ID} return CONF_ID;
{QUALIFIED_ID} return CONF_QUALIFIED_ID;
{STRING} return CONF_STRING;
{UNQUOTED_STRING} return CONF_UNQUOTED_STRING;
{INTEGER} return CONF_INTEGER;
{REAL} return CONF_REAL;
= return CONF_EQUALS;
. return CONF_ERROR;
%%
extern bool
ProcessConfigFile(FILE *fp, const char *config_file, t_configuration_options *options, ItemList *error_list, ItemList *warning_list)
{
volatile bool OK = true;
volatile YY_BUFFER_STATE lex_buffer = NULL;
sigjmp_buf flex_fatal_jmp;
int errorcount;
int token;
if (sigsetjmp(flex_fatal_jmp, 1) == 0)
{
CONF_flex_fatal_jmp = &flex_fatal_jmp;
}
else
{
/*
* Regain control after a fatal, internal flex error. It may have
* corrupted parser state. Consequently, abandon the file, but trust
* that the state remains sane enough for yy_delete_buffer().
*/
item_list_append_format(error_list,
"%s at file \"%s\" line %u",
CONF_flex_fatal_errmsg, config_file, ConfigFileLineno);
OK = false;
goto cleanup;
}
/*
* Parse
*/
ConfigFileLineno = 1;
errorcount = 0;
lex_buffer = yy_create_buffer(fp, YY_BUF_SIZE);
yy_switch_to_buffer(lex_buffer);
/* This loop iterates once per logical line */
while ((token = yylex()))
{
char *opt_name = NULL;
char *opt_value = NULL;
if (token == CONF_EOL) /* empty or comment line */
continue;
/* first token on line is option name */
if (token != CONF_ID && token != CONF_QUALIFIED_ID)
goto parse_error;
opt_name = pstrdup(yytext);
/* next we have an optional equal sign; discard if present */
token = yylex();
if (token == CONF_EQUALS)
token = yylex();
/* now we must have the option value */
if (token != CONF_ID &&
token != CONF_STRING &&
token != CONF_INTEGER &&
token != CONF_REAL &&
token != CONF_UNQUOTED_STRING)
goto parse_error;
if (token == CONF_STRING) /* strip quotes and escapes */
opt_value = CONF_scanstr(yytext);
else
opt_value = pstrdup(yytext);
/* now we'd like an end of line, or possibly EOF */
token = yylex();
if (token != CONF_EOL)
{
if (token != 0)
goto parse_error;
/* treat EOF like \n for line numbering purposes, cf bug 4752 */
ConfigFileLineno++;
}
/* OK, process the option name and value */
parse_configuration_item(options,
error_list,
warning_list,
opt_name,
opt_value);
/* break out of loop if read EOF, else loop for next line */
if (token == 0)
break;
continue;
parse_error:
/* release storage if we allocated any on this line */
if (opt_name)
pfree(opt_name);
if (opt_value)
pfree(opt_value);
/* report the error */
if (token == CONF_EOL || token == 0)
{
item_list_append_format(error_list,
_("syntax error in file \"%s\" line %u, near end of line"),
config_file, ConfigFileLineno - 1);
}
else
{
item_list_append_format(error_list,
_("syntax error in file \"%s\" line %u, near token \"%s\""),
config_file, ConfigFileLineno, yytext);
}
OK = false;
errorcount++;
/*
* To avoid producing too much noise when fed a totally bogus file,
* give up after 100 syntax errors per file (an arbitrary number).
* Also, if we're only logging the errors at DEBUG level anyway, might
* as well give up immediately. (This prevents postmaster children
* from bloating the logs with duplicate complaints.)
*/
if (errorcount >= 100)
{
fprintf(stderr,
_("too many syntax errors found, abandoning file \"%s\"\n"),
config_file);
break;
}
/* resync to next end-of-line or EOF */
while (token != CONF_EOL && token != 0)
token = yylex();
/* break out of loop on EOF */
if (token == 0)
break;
}
cleanup:
yy_delete_buffer(lex_buffer);
return OK;
}
/*
* scanstr
*
* Strip the quotes surrounding the given string, and collapse any embedded
* '' sequences and backslash escapes.
*
* the string returned is palloc'd and should eventually be pfree'd by the
* caller.
*/
static char *
CONF_scanstr(const char *s)
{
char *newStr;
int len,
i,
j;
Assert(s != NULL && s[0] == '\'');
len = strlen(s);
Assert(s != NULL);
Assert(len >= 2);
Assert(s[len - 1] == '\'');
/* Skip the leading quote; we'll handle the trailing quote below */
s++, len--;
/* Since len still includes trailing quote, this is enough space */
newStr = palloc(len);
for (i = 0, j = 0; i < len; i++)
{
if (s[i] == '\\')
{
i++;
switch (s[i])
{
case 'b':
newStr[j] = '\b';
break;
case 'f':
newStr[j] = '\f';
break;
case 'n':
newStr[j] = '\n';
break;
case 'r':
newStr[j] = '\r';
break;
case 't':
newStr[j] = '\t';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
int k;
long octVal = 0;
for (k = 0;
s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
k++)
octVal = (octVal << 3) + (s[i + k] - '0');
i += k - 1;
newStr[j] = ((char) octVal);
}
break;
default:
newStr[j] = s[i];
break;
} /* switch */
}
else if (s[i] == '\'' && s[i + 1] == '\'')
{
/* doubled quote becomes just one quote */
newStr[j] = s[++i];
}
else
newStr[j] = s[i];
j++;
}
/* We copied the ending quote to newStr, so replace with \0 */
Assert(j > 0 && j <= len);
newStr[--j] = '\0';
return newStr;
}
/*
* Flex fatal errors bring us here. Stash the error message and jump back to
* ParseConfigFp(). Assume all msg arguments point to string constants; this
* holds for flex 2.5.31 (earliest we support) and flex 2.5.35 (latest as of
* this writing). Otherwise, we would need to copy the message.
*
* We return "int" since this takes the place of calls to fprintf().
*/
static int
CONF_flex_fatal(const char *msg)
{
CONF_flex_fatal_errmsg = msg;
siglongjmp(*CONF_flex_fatal_jmp, 1);
return 0; /* keep compiler quiet */
}

View File

@@ -266,12 +266,6 @@ static void
_parse_config(t_configuration_options *options, ItemList *error_list, ItemList *warning_list) _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *warning_list)
{ {
FILE *fp; FILE *fp;
char *s = NULL,
buf[MAXLINELENGTH] = "";
char name[MAXLEN] = "";
char value[MAXLEN] = "";
bool node_id_found = false;
/* Initialize configuration options with sensible defaults */ /* Initialize configuration options with sensible defaults */
@@ -468,27 +462,112 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
/* Read file */ (void) ProcessConfigFile(fp, config_file_path, options, error_list, warning_list);
while ((s = fgets(buf, sizeof buf, fp)) != NULL)
fclose(fp);
/* check required parameters */
if (options->node_id == UNKNOWN_NODE_ID)
{ {
item_list_append(error_list, _("\"node_id\": required parameter was not found"));
}
if (!strlen(options->node_name))
{
item_list_append(error_list, _("\"node_name\": required parameter was not found"));
}
if (!strlen(options->data_directory))
{
item_list_append(error_list, _("\"data_directory\": required parameter was not found"));
}
if (!strlen(options->conninfo))
{
item_list_append(error_list, _("\"conninfo\": required parameter was not found"));
}
else
{
/*
* Sanity check the provided conninfo string
*
* NOTE: PQconninfoParse() verifies the string format and checks for
* valid options but does not sanity check values
*/
PQconninfoOption *conninfo_options = NULL;
char *conninfo_errmsg = NULL;
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
if (conninfo_options == NULL)
{
PQExpBufferData error_message_buf;
initPQExpBuffer(&error_message_buf);
appendPQExpBuffer(&error_message_buf,
_("\"conninfo\": %s (provided: \"%s\")"),
conninfo_errmsg,
options->conninfo);
item_list_append(error_list, error_message_buf.data);
termPQExpBuffer(&error_message_buf);
}
PQconninfoFree(conninfo_options);
}
/* set values for parameters which default to other parameters */
/*
* From 4.1, "repmgrd_standby_startup_timeout" replaces "standby_reconnect_timeout"
* in repmgrd; fall back to "standby_reconnect_timeout" if no value explicitly provided
*/
if (options->repmgrd_standby_startup_timeout == -1)
{
options->repmgrd_standby_startup_timeout = options->standby_reconnect_timeout;
}
/* add warning about changed "barman_" parameter meanings */
if ((options->barman_host[0] == '\0' && options->barman_server[0] != '\0') ||
(options->barman_host[0] != '\0' && options->barman_server[0] == '\0'))
{
item_list_append(error_list,
_("use \"barman_host\" for the hostname of the Barman server"));
item_list_append(error_list,
_("use \"barman_server\" for the name of the [server] section in the Barman configuration file"));
}
/* other sanity checks */
if (options->archive_ready_warning >= options->archive_ready_critical)
{
item_list_append(error_list,
_("\"archive_ready_critical\" must be greater than \"archive_ready_warning\""));
}
if (options->replication_lag_warning >= options->replication_lag_critical)
{
item_list_append(error_list,
_("\"replication_lag_critical\" must be greater than \"replication_lag_warning\""));
}
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
{
item_list_append(error_list,
_("\"standby_reconnect_timeout\" must be equal to or greater than \"node_rejoin_timeout\""));
}
}
void
parse_configuration_item(t_configuration_options *options, ItemList *error_list, ItemList *warning_list, const char *name, const char *value)
{
bool known_parameter = true; bool known_parameter = true;
/* Parse name/value pair from line */
_parse_line(buf, name, value);
/* Skip blank lines */
if (!strlen(name))
continue;
/* Skip comments */
if (name[0] == '#')
continue;
/* Copy into correct entry in parameters struct */
if (strcmp(name, "node_id") == 0) if (strcmp(name, "node_id") == 0)
{ {
options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID); options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID);
node_id_found = true;
} }
else if (strcmp(name, "node_name") == 0) else if (strcmp(name, "node_name") == 0)
{ {
@@ -500,7 +579,9 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
sizeof(options->node_name)); sizeof(options->node_name));
} }
else if (strcmp(name, "conninfo") == 0) else if (strcmp(name, "conninfo") == 0)
{
strncpy(options->conninfo, value, MAXLEN); strncpy(options->conninfo, value, MAXLEN);
}
else if (strcmp(name, "data_directory") == 0) else if (strcmp(name, "data_directory") == 0)
{ {
strncpy(options->data_directory, value, MAXPGPATH); strncpy(options->data_directory, value, MAXPGPATH);
@@ -821,106 +902,9 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
item_list_append(error_list, error_message_buf); item_list_append(error_list, error_message_buf);
} }
}
fclose(fp);
/* check required parameters */
if (node_id_found == false)
{
item_list_append(error_list, _("\"node_id\": required parameter was not found"));
}
if (!strlen(options->node_name))
{
item_list_append(error_list, _("\"node_name\": required parameter was not found"));
}
if (!strlen(options->data_directory))
{
item_list_append(error_list, _("\"data_directory\": required parameter was not found"));
}
if (!strlen(options->conninfo))
{
item_list_append(error_list, _("\"conninfo\": required parameter was not found"));
}
else
{
/*
* Sanity check the provided conninfo string
*
* NOTE: PQconninfoParse() verifies the string format and checks for
* valid options but does not sanity check values
*/
PQconninfoOption *conninfo_options = NULL;
char *conninfo_errmsg = NULL;
conninfo_options = PQconninfoParse(options->conninfo, &conninfo_errmsg);
if (conninfo_options == NULL)
{
PQExpBufferData error_message_buf;
initPQExpBuffer(&error_message_buf);
appendPQExpBuffer(&error_message_buf,
_("\"conninfo\": %s (provided: \"%s\")"),
conninfo_errmsg,
options->conninfo);
item_list_append(error_list, error_message_buf.data);
termPQExpBuffer(&error_message_buf);
}
PQconninfoFree(conninfo_options);
}
/* set values for parameters which default to other parameters */
/*
* From 4.1, "repmgrd_standby_startup_timeout" replaces "standby_reconnect_timeout"
* in repmgrd; fall back to "standby_reconnect_timeout" if no value explicitly provided
*/
if (options->repmgrd_standby_startup_timeout == -1)
{
options->repmgrd_standby_startup_timeout = options->standby_reconnect_timeout;
}
/* add warning about changed "barman_" parameter meanings */
if ((options->barman_host[0] == '\0' && options->barman_server[0] != '\0') ||
(options->barman_host[0] != '\0' && options->barman_server[0] == '\0'))
{
item_list_append(error_list,
_("use \"barman_host\" for the hostname of the Barman server"));
item_list_append(error_list,
_("use \"barman_server\" for the name of the [server] section in the Barman configuration file"));
}
/* other sanity checks */
if (options->archive_ready_warning >= options->archive_ready_critical)
{
item_list_append(error_list,
_("\"archive_ready_critical\" must be greater than \"archive_ready_warning\""));
}
if (options->replication_lag_warning >= options->replication_lag_critical)
{
item_list_append(error_list,
_("\"replication_lag_critical\" must be greater than \"replication_lag_warning\""));
}
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
{
item_list_append(error_list,
_("\"standby_reconnect_timeout\" must be equal to or greater than \"node_rejoin_timeout\""));
}
} }
bool bool
parse_recovery_conf(const char *data_dir, t_recovery_conf *conf) parse_recovery_conf(const char *data_dir, t_recovery_conf *conf)
{ {

View File

@@ -317,6 +317,8 @@ const char *progname(void);
void load_config(const char *config_file, bool verbose, bool terse, t_configuration_options *options, char *argv0); void load_config(const char *config_file, bool verbose, bool terse, t_configuration_options *options, char *argv0);
bool reload_config(t_configuration_options *orig_options, t_server_type server_type); bool reload_config(t_configuration_options *orig_options, t_server_type server_type);
void parse_configuration_item(t_configuration_options *options, ItemList *error_list, ItemList *warning_list, const char *name, const char *value);
bool parse_recovery_conf(const char *data_dir, t_recovery_conf *conf); bool parse_recovery_conf(const char *data_dir, t_recovery_conf *conf);
bool parse_bool(const char *s, bool parse_bool(const char *s,
@@ -342,4 +344,7 @@ void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
void print_item_list(ItemList *item_list); void print_item_list(ItemList *item_list);
const char *print_connection_check_type(ConnectionCheckType type); const char *print_connection_check_type(ConnectionCheckType type);
extern bool ProcessConfigFile(FILE *fp, const char *config_file, t_configuration_options *options, ItemList *error_list, ItemList *warning_list);
#endif /* _REPMGR_CONFIGFILE_H_ */ #endif /* _REPMGR_CONFIGFILE_H_ */

View File

@@ -24,6 +24,33 @@
<sect2> <sect2>
<title>General enhancements</title> <title>General enhancements</title>
<para> <para>
<itemizedlist>
<listitem>
<para>
The &repmgr; configuration file is now parsed using
<command>flex</command>, meaning it will be parsed in
the same way as PostgreSQL parses its own configuration
files.
</para>
<para>
This makes configuration file parsing more robust
and consistent.
</para>
<note>
<para>
This change makes configuration file parsing somewhat stricter
than previously. When upgrading, be sure to check your
configuration file syntax.
<!-- XXX add notes in upgrade section -->
</para>
<para>
In particular, all string values containing spaces
<emphasis>must</emphasis> be contained within single quotes.
</para>
</note>
</listitem>
</itemizedlist>
</para> </para>
</sect2> </sect2>
</sect1> </sect1>