1 /* Python brace format strings.
2 Copyright (C) 2004, 2006-2007, 2013, 2015 Free Software Foundation,
4 Written by Daiki Ueno <ueno@gnu.org>, 2013.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
34 #define _(str) gettext (str)
36 /* Python brace format strings are defined by PEP3101 together with
37 'format' method of string class.
38 A format string directive here consists of
39 - an opening brace '{',
40 - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
41 - an optional getattr ('.') or getitem ('['..']') operator with
42 an identifier as argument,
43 - an optional format specifier starting with ':', with a
44 (unnested) format string as argument,
45 - a closing brace '}'.
46 Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'.
56 unsigned int directives;
57 unsigned int named_arg_count;
58 unsigned int allocated;
59 struct named_arg *named;
63 static bool parse_upto (struct spec *spec, const char **formatp,
64 bool is_toplevel, char terminator,
65 bool translated, char *fdi, char **invalid_reason);
66 static void free_named_args (struct spec *spec);
69 /* All the parse_* functions (except parse_upto) follow the same
70 calling convention. FORMATP shall point to the beginning of a token.
71 If parsing succeeds, FORMATP will point to the next character after
72 the token, and true is returned. Otherwise, FORMATP will be
73 unchanged and false is returned. */
76 parse_named_field (struct spec *spec,
77 const char **formatp, bool translated, char *fdi,
78 char **invalid_reason)
80 const char *format = *formatp;
84 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
88 while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
89 || (c >= '0' && c <= '9'));
97 parse_numeric_field (struct spec *spec,
98 const char **formatp, bool translated, char *fdi,
99 char **invalid_reason)
101 const char *format = *formatp;
105 if (c >= '0' && c <= '9')
109 while (c >= '0' && c <= '9');
117 parse_directive (struct spec *spec,
118 const char **formatp, bool is_toplevel,
119 bool translated, char *fdi, char **invalid_reason)
121 const char *format = *formatp;
122 const char *const format_start = format;
123 const char *name_start;
134 if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
135 && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
138 xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format);
139 FDI_SET (format, FMTDIR_ERROR);
147 if (!parse_named_field (spec, &format, translated, fdi,
151 xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format);
152 FDI_SET (format, FMTDIR_ERROR);
160 if (!parse_named_field (spec, &format, translated, fdi,
162 && !parse_numeric_field (spec, &format, translated, fdi,
166 xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format);
167 FDI_SET (format, FMTDIR_ERROR);
174 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
175 FDI_SET (format, FMTDIR_ERROR);
186 xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives);
187 FDI_SET (format, FMTDIR_ERROR);
191 /* Format specifiers. Although a format specifier can be any
192 string in theory, we can only recognize two types of format
193 specifiers below, because otherwise we would need to evaluate
194 Python expressions by ourselves:
196 - A nested format directive expanding to the whole string
197 - The Standard Format Specifiers, as described in PEP3101,
198 not including a nested format directive */
202 /* Nested format directive. */
203 if (!parse_directive (spec, &format, false, translated, fdi,
206 /* FDI and INVALID_REASON will be set by a recursive call of
213 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
214 FDI_SET (format, FMTDIR_ERROR);
220 /* Standard format specifiers is in the form:
221 [[fill]align][sign][#][0][minimumwidth][.precision][type] */
223 /* Look ahead two characters to skip [[fill]align]. */
229 if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
231 else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
233 if (*format == '+' || *format == '-' || *format == ' ')
239 while (c_isdigit (*format))
244 while (c_isdigit (*format))
249 case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
251 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
260 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
261 FDI_SET (format, FMTDIR_ERROR);
271 xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives);
272 FDI_SET (format, FMTDIR_ERROR);
279 size_t n = format - name_start;
281 FDI_SET (name_start - 1, FMTDIR_START);
283 name = XNMALLOC (n + 1, char);
284 memcpy (name, name_start, n);
289 if (spec->allocated == spec->named_arg_count)
291 spec->allocated = 2 * spec->allocated + 1;
292 spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
294 spec->named[spec->named_arg_count].name = name;
295 spec->named_arg_count++;
297 FDI_SET (format, FMTDIR_END);
305 parse_upto (struct spec *spec,
306 const char **formatp, bool is_toplevel, char terminator,
307 bool translated, char *fdi, char **invalid_reason)
309 const char *format = *formatp;
311 for (; *format != terminator && *format != '\0';)
315 if (!parse_directive (spec, &format, is_toplevel, translated, fdi,
328 named_arg_compare (const void *p1, const void *p2)
330 return strcmp (((const struct named_arg *) p1)->name,
331 ((const struct named_arg *) p2)->name);
335 format_parse (const char *format, bool translated, char *fdi,
336 char **invalid_reason)
342 spec.named_arg_count = 0;
346 if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason))
348 free_named_args (&spec);
352 /* Sort the named argument array, and eliminate duplicates. */
353 if (spec.named_arg_count > 1)
357 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
360 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
361 for (i = j = 0; i < spec.named_arg_count; i++)
362 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
363 free (spec.named[i].name);
367 spec.named[j].name = spec.named[i].name;
370 spec.named_arg_count = j;
373 result = XMALLOC (struct spec);
379 free_named_args (struct spec *spec)
381 if (spec->named != NULL)
384 for (i = 0; i < spec->named_arg_count; i++)
385 free (spec->named[i].name);
391 format_free (void *descr)
393 struct spec *spec = (struct spec *) descr;
395 free_named_args (spec);
400 format_get_number_of_directives (void *descr)
402 struct spec *spec = (struct spec *) descr;
404 return spec->directives;
408 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
409 formatstring_error_logger_t error_logger,
410 const char *pretty_msgid, const char *pretty_msgstr)
412 struct spec *spec1 = (struct spec *) msgid_descr;
413 struct spec *spec2 = (struct spec *) msgstr_descr;
416 if (spec1->named_arg_count + spec2->named_arg_count > 0)
419 unsigned int n1 = spec1->named_arg_count;
420 unsigned int n2 = spec2->named_arg_count;
422 /* Check the argument names in spec1 are contained in those of spec2.
423 Both arrays are sorted. We search for the differences. */
424 for (i = 0, j = 0; i < n1 || j < n2; )
426 int cmp = (i >= n1 ? 1 :
428 strcmp (spec1->named[i].name, spec2->named[j].name));
435 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
436 spec2->named[i].name, pretty_msgid);
448 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
449 spec1->named[i].name, pretty_msgstr);
465 struct formatstring_parser formatstring_python_brace =
469 format_get_number_of_directives,
477 /* Test program: Print the argument list specification returned by
478 format_parse for strings read from standard input. */
483 format_print (void *descr)
485 struct spec *spec = (struct spec *) descr;
495 for (i = 0; i < spec->named_arg_count; i++)
499 printf ("'%s'", spec->named[i].name);
510 size_t line_size = 0;
512 char *invalid_reason;
515 line_len = getline (&line, &line_size, stdin);
518 if (line_len > 0 && line[line_len - 1] == '\n')
519 line[--line_len] = '\0';
521 invalid_reason = NULL;
522 descr = format_parse (line, false, NULL, &invalid_reason);
524 format_print (descr);
527 printf ("%s\n", invalid_reason);
529 free (invalid_reason);
537 * For Emacs M-x compile
539 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"