1 /* Python format strings.
2 Copyright (C) 2001-2004, 2006-2009 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
33 #define _(str) gettext (str)
35 /* Python format strings are described in
36 Python Library reference
37 2. Built-in Types, Exceptions and Functions
40 2.1.5.2. String Formatting Operations
41 Any string or Unicode string can act as format string via the '%' operator,
42 implemented in stringobject.c and unicodeobject.c.
45 - is optionally followed by '(ident)' where ident is any sequence of
46 characters with balanced left and right parentheses,
47 - is optionally followed by any of the characters '-' (left justification),
48 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
50 - is optionally followed by a width specification: '*' (reads an argument)
51 or a nonempty digit sequence,
52 - is optionally followed by '.' and a precision specification: '*' (reads
53 an argument) or a nonempty digit sequence,
54 - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
55 - is finished by a specifier
56 - '%', that needs no argument,
57 - 'c', that needs a character argument,
58 - 's', 'r', that need a string argument (or, when a precision of 0 is
59 given, an argument of any type),
60 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62 Use of '(ident)' and use of unnamed argument specifications are exclusive,
63 because the first requires a mapping as argument, while the second requires
64 a tuple as argument. When unnamed arguments are used, the number of
65 arguments in the format string and the number of elements in the argument
66 tuple (to the right of the '%' operator) must be the same.
82 enum format_arg_type type;
87 enum format_arg_type type;
92 unsigned int directives;
93 unsigned int named_arg_count;
94 unsigned int unnamed_arg_count;
95 unsigned int allocated;
96 struct named_arg *named;
97 struct unnamed_arg *unnamed;
100 /* Locale independent test for a decimal digit.
101 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
102 <ctype.h> isdigit must be an 'unsigned char'.) */
104 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
108 named_arg_compare (const void *p1, const void *p2)
110 return strcmp (((const struct named_arg *) p1)->name,
111 ((const struct named_arg *) p2)->name);
114 #define INVALID_MIXES_NAMED_UNNAMED() \
115 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
118 format_parse (const char *format, bool translated, char *fdi,
119 char **invalid_reason)
121 const char *const format_start = format;
126 spec.named_arg_count = 0;
127 spec.unnamed_arg_count = 0;
132 for (; *format != '\0';)
133 if (*format++ == '%')
137 bool zero_precision = false;
138 enum format_arg_type type;
140 FDI_SET (format - 1, FMTDIR_START);
146 const char *name_start;
147 const char *name_end;
150 name_start = ++format;
152 for (; *format != '\0'; format++)
156 else if (*format == ')')
166 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
167 FDI_SET (format - 1, FMTDIR_ERROR);
172 n = name_end - name_start;
173 name = XNMALLOC (n + 1, char);
174 memcpy (name, name_start, n);
178 while (*format == '-' || *format == '+' || *format == ' '
179 || *format == '#' || *format == '0')
186 /* Named and unnamed specifications are exclusive. */
187 if (spec.named_arg_count > 0)
189 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
190 FDI_SET (format - 1, FMTDIR_ERROR);
194 if (spec.allocated == spec.unnamed_arg_count)
196 spec.allocated = 2 * spec.allocated + 1;
197 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
199 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
200 spec.unnamed_arg_count++;
202 else if (isdigit (*format))
204 do format++; while (isdigit (*format));
215 /* Named and unnamed specifications are exclusive. */
216 if (spec.named_arg_count > 0)
218 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
219 FDI_SET (format - 1, FMTDIR_ERROR);
223 if (spec.allocated == spec.unnamed_arg_count)
225 spec.allocated = 2 * spec.allocated + 1;
226 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
228 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
229 spec.unnamed_arg_count++;
231 else if (isdigit (*format))
233 zero_precision = true;
237 zero_precision = false;
240 while (isdigit (*format));
244 if (*format == 'h' || *format == 'l' || *format == 'L')
253 type = FAT_CHARACTER;
256 type = (zero_precision ? FAT_ANY : FAT_STRING);
258 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
261 case 'e': case 'E': case 'f': case 'g': case 'G':
267 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
268 FDI_SET (format - 1, FMTDIR_ERROR);
273 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
274 FDI_SET (format, FMTDIR_ERROR);
281 /* Named argument. */
283 /* Named and unnamed specifications are exclusive. */
284 if (spec.unnamed_arg_count > 0)
286 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
287 FDI_SET (format, FMTDIR_ERROR);
291 if (spec.allocated == spec.named_arg_count)
293 spec.allocated = 2 * spec.allocated + 1;
294 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
296 spec.named[spec.named_arg_count].name = name;
297 spec.named[spec.named_arg_count].type = type;
298 spec.named_arg_count++;
300 else if (*format != '%')
302 /* Unnamed argument. */
304 /* Named and unnamed specifications are exclusive. */
305 if (spec.named_arg_count > 0)
307 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
308 FDI_SET (format, FMTDIR_ERROR);
312 if (spec.allocated == spec.unnamed_arg_count)
314 spec.allocated = 2 * spec.allocated + 1;
315 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
317 spec.unnamed[spec.unnamed_arg_count].type = type;
318 spec.unnamed_arg_count++;
321 FDI_SET (format, FMTDIR_END);
326 /* Sort the named argument array, and eliminate duplicates. */
327 if (spec.named_arg_count > 1)
332 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
335 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
337 for (i = j = 0; i < spec.named_arg_count; i++)
338 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
340 enum format_arg_type type1 = spec.named[i].type;
341 enum format_arg_type type2 = spec.named[j-1].type;
342 enum format_arg_type type_both;
344 if (type1 == type2 || type2 == FAT_ANY)
346 else if (type1 == FAT_ANY)
350 /* Incompatible types. */
351 type_both = FAT_NONE;
354 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
358 spec.named[j-1].type = type_both;
359 free (spec.named[i].name);
365 spec.named[j].name = spec.named[i].name;
366 spec.named[j].type = spec.named[i].type;
370 spec.named_arg_count = j;
372 /* *invalid_reason has already been set above. */
376 result = XMALLOC (struct spec);
381 if (spec.named != NULL)
384 for (i = 0; i < spec.named_arg_count; i++)
385 free (spec.named[i].name);
388 if (spec.unnamed != NULL)
394 format_free (void *descr)
396 struct spec *spec = (struct spec *) descr;
398 if (spec->named != NULL)
401 for (i = 0; i < spec->named_arg_count; i++)
402 free (spec->named[i].name);
405 if (spec->unnamed != NULL)
406 free (spec->unnamed);
411 format_get_number_of_directives (void *descr)
413 struct spec *spec = (struct spec *) descr;
415 return spec->directives;
419 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
420 formatstring_error_logger_t error_logger,
421 const char *pretty_msgid, const char *pretty_msgstr)
423 struct spec *spec1 = (struct spec *) msgid_descr;
424 struct spec *spec2 = (struct spec *) msgstr_descr;
427 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
430 error_logger (_("format specifications in '%s' expect a mapping, those in '%s' expect a tuple"),
431 pretty_msgid, pretty_msgstr);
434 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
437 error_logger (_("format specifications in '%s' expect a tuple, those in '%s' expect a mapping"),
438 pretty_msgid, pretty_msgstr);
443 if (spec1->named_arg_count + spec2->named_arg_count > 0)
446 unsigned int n1 = spec1->named_arg_count;
447 unsigned int n2 = spec2->named_arg_count;
449 /* Check the argument names are the same.
450 Both arrays are sorted. We search for the first difference. */
451 for (i = 0, j = 0; i < n1 || j < n2; )
453 int cmp = (i >= n1 ? 1 :
455 strcmp (spec1->named[i].name, spec2->named[j].name));
460 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
461 spec2->named[j].name, pretty_msgstr,
471 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
472 spec1->named[i].name, pretty_msgstr);
482 /* Check the argument types are the same. */
484 for (i = 0, j = 0; j < n2; )
486 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
488 if (!(spec1->named[i].type == spec2->named[j].type
490 && (spec1->named[i].type == FAT_ANY
491 || spec2->named[j].type == FAT_ANY))))
494 error_logger (_("format specifications in '%s' and '%s' for argument '%s' are not the same"),
495 pretty_msgid, pretty_msgstr,
496 spec2->named[j].name);
507 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
511 /* Check the argument types are the same. */
512 if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
515 error_logger (_("number of format specifications in '%s' and '%s' does not match"),
516 pretty_msgid, pretty_msgstr);
520 for (i = 0; i < spec2->unnamed_arg_count; i++)
521 if (!(spec1->unnamed[i].type == spec2->unnamed[i].type
523 && (spec1->unnamed[i].type == FAT_ANY
524 || spec2->unnamed[i].type == FAT_ANY))))
527 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
528 pretty_msgid, pretty_msgstr, i + 1);
538 struct formatstring_parser formatstring_python =
542 format_get_number_of_directives,
549 get_python_format_unnamed_arg_count (const char *string)
551 /* Parse the format string. */
552 char *invalid_reason = NULL;
554 (struct spec *) format_parse (string, false, NULL, &invalid_reason);
558 unsigned int result = descr->unnamed_arg_count;
565 free (invalid_reason);
573 /* Test program: Print the argument list specification returned by
574 format_parse for strings read from standard input. */
579 format_print (void *descr)
581 struct spec *spec = (struct spec *) descr;
590 if (spec->named_arg_count > 0)
592 if (spec->unnamed_arg_count > 0)
596 for (i = 0; i < spec->named_arg_count; i++)
600 printf ("'%s':", spec->named[i].name);
601 switch (spec->named[i].type)
627 for (i = 0; i < spec->unnamed_arg_count; i++)
631 switch (spec->unnamed[i].type)
662 size_t line_size = 0;
664 char *invalid_reason;
667 line_len = getline (&line, &line_size, stdin);
670 if (line_len > 0 && line[line_len - 1] == '\n')
671 line[--line_len] = '\0';
673 invalid_reason = NULL;
674 descr = format_parse (line, false, NULL, &invalid_reason);
676 format_print (descr);
679 printf ("%s\n", invalid_reason);
681 free (invalid_reason);
689 * For Emacs M-x compile
691 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la"