1 /* Python format strings.
2 Copyright (C) 2001-2004, 2006-2009, 2015 Free Software Foundation,
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
34 #define _(str) gettext (str)
36 /* Python format strings are described in
37 Python Library reference
38 2. Built-in Types, Exceptions and Functions
41 2.1.5.2. String Formatting Operations
42 Any string or Unicode string can act as format string via the '%' operator,
43 implemented in stringobject.c and unicodeobject.c.
46 - is optionally followed by '(ident)' where ident is any sequence of
47 characters with balanced left and right parentheses,
48 - is optionally followed by any of the characters '-' (left justification),
49 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
51 - is optionally followed by a width specification: '*' (reads an argument)
52 or a nonempty digit sequence,
53 - is optionally followed by '.' and a precision specification: '*' (reads
54 an argument) or a nonempty digit sequence,
55 - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
56 - is finished by a specifier
57 - '%', that needs no argument,
58 - 'c', that needs a character argument,
59 - 's', 'r', that need a string argument (or, when a precision of 0 is
60 given, an argument of any type),
61 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
62 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
63 Use of '(ident)' and use of unnamed argument specifications are exclusive,
64 because the first requires a mapping as argument, while the second requires
65 a tuple as argument. When unnamed arguments are used, the number of
66 arguments in the format string and the number of elements in the argument
67 tuple (to the right of the '%' operator) must be the same.
83 enum format_arg_type type;
88 enum format_arg_type type;
93 unsigned int directives;
94 unsigned int named_arg_count;
95 unsigned int unnamed_arg_count;
96 unsigned int allocated;
97 struct named_arg *named;
98 struct unnamed_arg *unnamed;
101 /* Locale independent test for a decimal digit.
102 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
103 <ctype.h> isdigit must be an 'unsigned char'.) */
105 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
109 named_arg_compare (const void *p1, const void *p2)
111 return strcmp (((const struct named_arg *) p1)->name,
112 ((const struct named_arg *) p2)->name);
115 #define INVALID_MIXES_NAMED_UNNAMED() \
116 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
119 format_parse (const char *format, bool translated, char *fdi,
120 char **invalid_reason)
122 const char *const format_start = format;
127 spec.named_arg_count = 0;
128 spec.unnamed_arg_count = 0;
133 for (; *format != '\0';)
134 if (*format++ == '%')
138 bool zero_precision = false;
139 enum format_arg_type type;
141 FDI_SET (format - 1, FMTDIR_START);
147 const char *name_start;
148 const char *name_end;
151 name_start = ++format;
153 for (; *format != '\0'; format++)
157 else if (*format == ')')
167 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
168 FDI_SET (format - 1, FMTDIR_ERROR);
173 n = name_end - name_start;
174 name = XNMALLOC (n + 1, char);
175 memcpy (name, name_start, n);
179 while (*format == '-' || *format == '+' || *format == ' '
180 || *format == '#' || *format == '0')
187 /* Named and unnamed specifications are exclusive. */
188 if (spec.named_arg_count > 0)
190 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
191 FDI_SET (format - 1, FMTDIR_ERROR);
195 if (spec.allocated == spec.unnamed_arg_count)
197 spec.allocated = 2 * spec.allocated + 1;
198 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
200 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
201 spec.unnamed_arg_count++;
203 else if (isdigit (*format))
205 do format++; while (isdigit (*format));
216 /* Named and unnamed specifications are exclusive. */
217 if (spec.named_arg_count > 0)
219 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
220 FDI_SET (format - 1, FMTDIR_ERROR);
224 if (spec.allocated == spec.unnamed_arg_count)
226 spec.allocated = 2 * spec.allocated + 1;
227 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
229 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
230 spec.unnamed_arg_count++;
232 else if (isdigit (*format))
234 zero_precision = true;
238 zero_precision = false;
241 while (isdigit (*format));
245 if (*format == 'h' || *format == 'l' || *format == 'L')
254 type = FAT_CHARACTER;
257 type = (zero_precision ? FAT_ANY : FAT_STRING);
259 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
262 case 'e': case 'E': case 'f': case 'g': case 'G':
268 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
269 FDI_SET (format - 1, FMTDIR_ERROR);
274 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
275 FDI_SET (format, FMTDIR_ERROR);
282 /* Named argument. */
284 /* Named and unnamed specifications are exclusive. */
285 if (spec.unnamed_arg_count > 0)
287 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
288 FDI_SET (format, FMTDIR_ERROR);
292 if (spec.allocated == spec.named_arg_count)
294 spec.allocated = 2 * spec.allocated + 1;
295 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
297 spec.named[spec.named_arg_count].name = name;
298 spec.named[spec.named_arg_count].type = type;
299 spec.named_arg_count++;
301 else if (*format != '%')
303 /* Unnamed argument. */
305 /* Named and unnamed specifications are exclusive. */
306 if (spec.named_arg_count > 0)
308 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
309 FDI_SET (format, FMTDIR_ERROR);
313 if (spec.allocated == spec.unnamed_arg_count)
315 spec.allocated = 2 * spec.allocated + 1;
316 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
318 spec.unnamed[spec.unnamed_arg_count].type = type;
319 spec.unnamed_arg_count++;
322 FDI_SET (format, FMTDIR_END);
327 /* Sort the named argument array, and eliminate duplicates. */
328 if (spec.named_arg_count > 1)
333 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
336 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
338 for (i = j = 0; i < spec.named_arg_count; i++)
339 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
341 enum format_arg_type type1 = spec.named[i].type;
342 enum format_arg_type type2 = spec.named[j-1].type;
343 enum format_arg_type type_both;
345 if (type1 == type2 || type2 == FAT_ANY)
347 else if (type1 == FAT_ANY)
351 /* Incompatible types. */
352 type_both = FAT_NONE;
355 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
359 spec.named[j-1].type = type_both;
360 free (spec.named[i].name);
366 spec.named[j].name = spec.named[i].name;
367 spec.named[j].type = spec.named[i].type;
371 spec.named_arg_count = j;
373 /* *invalid_reason has already been set above. */
377 result = XMALLOC (struct spec);
382 if (spec.named != NULL)
385 for (i = 0; i < spec.named_arg_count; i++)
386 free (spec.named[i].name);
389 if (spec.unnamed != NULL)
395 format_free (void *descr)
397 struct spec *spec = (struct spec *) descr;
399 if (spec->named != NULL)
402 for (i = 0; i < spec->named_arg_count; i++)
403 free (spec->named[i].name);
406 if (spec->unnamed != NULL)
407 free (spec->unnamed);
412 format_get_number_of_directives (void *descr)
414 struct spec *spec = (struct spec *) descr;
416 return spec->directives;
420 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
421 formatstring_error_logger_t error_logger,
422 const char *pretty_msgid, const char *pretty_msgstr)
424 struct spec *spec1 = (struct spec *) msgid_descr;
425 struct spec *spec2 = (struct spec *) msgstr_descr;
428 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
431 error_logger (_("format specifications in '%s' expect a mapping, those in '%s' expect a tuple"),
432 pretty_msgid, pretty_msgstr);
435 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
438 error_logger (_("format specifications in '%s' expect a tuple, those in '%s' expect a mapping"),
439 pretty_msgid, pretty_msgstr);
444 if (spec1->named_arg_count + spec2->named_arg_count > 0)
447 unsigned int n1 = spec1->named_arg_count;
448 unsigned int n2 = spec2->named_arg_count;
450 /* Check the argument names are the same.
451 Both arrays are sorted. We search for the first difference. */
452 for (i = 0, j = 0; i < n1 || j < n2; )
454 int cmp = (i >= n1 ? 1 :
456 strcmp (spec1->named[i].name, spec2->named[j].name));
461 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
462 spec2->named[j].name, pretty_msgstr,
472 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
473 spec1->named[i].name, pretty_msgstr);
483 /* Check the argument types are the same. */
485 for (i = 0, j = 0; j < n2; )
487 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
489 if (!(spec1->named[i].type == spec2->named[j].type
491 && (spec1->named[i].type == FAT_ANY
492 || spec2->named[j].type == FAT_ANY))))
495 error_logger (_("format specifications in '%s' and '%s' for argument '%s' are not the same"),
496 pretty_msgid, pretty_msgstr,
497 spec2->named[j].name);
508 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
512 /* Check the argument types are the same. */
513 if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
516 error_logger (_("number of format specifications in '%s' and '%s' does not match"),
517 pretty_msgid, pretty_msgstr);
521 for (i = 0; i < spec2->unnamed_arg_count; i++)
522 if (!(spec1->unnamed[i].type == spec2->unnamed[i].type
524 && (spec1->unnamed[i].type == FAT_ANY
525 || spec2->unnamed[i].type == FAT_ANY))))
528 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
529 pretty_msgid, pretty_msgstr, i + 1);
539 struct formatstring_parser formatstring_python =
543 format_get_number_of_directives,
550 get_python_format_unnamed_arg_count (const char *string)
552 /* Parse the format string. */
553 char *invalid_reason = NULL;
555 (struct spec *) format_parse (string, false, NULL, &invalid_reason);
559 unsigned int result = descr->unnamed_arg_count;
566 free (invalid_reason);
574 /* Test program: Print the argument list specification returned by
575 format_parse for strings read from standard input. */
580 format_print (void *descr)
582 struct spec *spec = (struct spec *) descr;
591 if (spec->named_arg_count > 0)
593 if (spec->unnamed_arg_count > 0)
597 for (i = 0; i < spec->named_arg_count; i++)
601 printf ("'%s':", spec->named[i].name);
602 switch (spec->named[i].type)
628 for (i = 0; i < spec->unnamed_arg_count; i++)
632 switch (spec->unnamed[i].type)
663 size_t line_size = 0;
665 char *invalid_reason;
668 line_len = getline (&line, &line_size, stdin);
671 if (line_len > 0 && line[line_len - 1] == '\n')
672 line[--line_len] = '\0';
674 invalid_reason = NULL;
675 descr = format_parse (line, false, NULL, &invalid_reason);
677 format_print (descr);
680 printf ("%s\n", invalid_reason);
682 free (invalid_reason);
690 * For Emacs M-x compile
692 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la"