1 /* Perl format strings.
2 Copyright (C) 2004, 2006-2007, 2009 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
32 #define _(str) gettext (str)
34 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in
37 - starts with '%' or '%m$' where m is a positive integer starting with a
39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40 each of which acts as a flag,
41 - is optionally followed by a vector specification: 'v' or '*v' (reads an
42 argument) or '*m$v' where m is a positive integer starting with a nonzero
44 - is optionally followed by a width specification: '*' (reads an argument)
45 or '*m$' where m is a positive integer starting with a nonzero digit or
46 a nonempty digit sequence starting with a nonzero digit,
47 - is optionally followed by '.' and a precision specification: '*' (reads
48 an argument) or '*m$' where m is a positive integer starting with a
49 nonzero digit or a digit sequence,
50 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q'
52 - is finished by a specifier
53 - '%', that needs no argument,
54 - 'c', that needs a small integer argument,
55 - 's', that needs a string argument,
56 - '_', that needs a scalar vector argument,
57 - 'p', that needs a pointer argument,
58 - 'i', 'd', 'D', that need an integer argument,
59 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer
61 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument,
62 - 'n', that needs a pointer to integer.
63 So there can be numbered argument specifications:
64 - '%m$' for the format string,
65 - '*m$v' for the vector,
66 - '*m$' for the width,
67 - '.*m$' for the precision.
68 Numbered and unnumbered argument specifications can be used in the same
69 string. The effect of '%m$' is to take argument number m, without affecting
70 the current argument number. The current argument number is incremented
71 after processing a directive with an unnumbered argument specification.
82 FAT_SCALAR_VECTOR = 5,
84 FAT_COUNT_POINTER = 7,
86 FAT_UNSIGNED = 1 << 3,
87 FAT_SIZE_SHORT = 1 << 4,
89 FAT_SIZE_PTR = 3 << 4,
90 FAT_SIZE_LONG = 4 << 4,
91 FAT_SIZE_LONGLONG = 5 << 4,
93 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR
94 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
97 typedef int format_arg_type_t;
99 typedef enum format_arg_type format_arg_type_t;
105 format_arg_type_t type;
110 unsigned int directives;
111 unsigned int numbered_arg_count;
112 unsigned int allocated;
113 struct numbered_arg *numbered;
116 /* Locale independent test for a decimal digit.
117 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
118 <ctype.h> isdigit must be an 'unsigned char'.) */
120 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
122 /* Locale independent test for a nonzero decimal digit. */
123 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9)
127 numbered_arg_compare (const void *p1, const void *p2)
129 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
130 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
132 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
136 format_parse (const char *format, bool translated, char *fdi,
137 char **invalid_reason)
139 const char *const format_start = format;
140 unsigned int directives;
141 unsigned int numbered_arg_count;
142 unsigned int allocated;
143 struct numbered_arg *numbered;
144 unsigned int unnumbered_arg_count;
148 numbered_arg_count = 0;
149 unnumbered_arg_count = 0;
153 for (; *format != '\0';)
154 if (*format++ == '%')
157 unsigned int number = 0;
158 bool vectorize = false;
159 format_arg_type_t type;
160 format_arg_type_t size;
162 FDI_SET (format - 1, FMTDIR_START);
165 if (isnonzerodigit (*format))
167 const char *f = format;
172 m = 10 * m + (*f - '0');
175 while (isdigit (*f));
185 while (*format == ' ' || *format == '+' || *format == '-'
186 || *format == '#' || *format == '0')
195 else if (*format == '*')
197 const char *f = format;
205 /* Unnumbered argument. */
206 if (allocated == numbered_arg_count)
208 allocated = 2 * allocated + 1;
209 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
211 numbered[numbered_arg_count].number = ++unnumbered_arg_count;
212 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
213 numbered_arg_count++;
215 else if (isnonzerodigit (*f))
221 m = 10 * m + (*f - '0');
224 while (isdigit (*f));
231 unsigned int vector_number = m;
236 /* Numbered argument. */
237 /* Note: As of perl-5.8.0, this is not correctly
238 implemented in perl's sv.c. */
239 if (allocated == numbered_arg_count)
241 allocated = 2 * allocated + 1;
242 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
244 numbered[numbered_arg_count].number = vector_number;
245 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
246 numbered_arg_count++;
254 /* Numbered or unnumbered argument. */
255 if (allocated == numbered_arg_count)
257 allocated = 2 * allocated + 1;
258 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
260 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
261 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR;
262 numbered_arg_count++;
268 unsigned int width_number = 0;
272 if (isnonzerodigit (*format))
274 const char *f = format;
279 m = 10 * m + (*f - '0');
282 while (isdigit (*f));
291 /* Numbered or unnumbered argument. */
292 /* Note: As of perl-5.8.0, this is not correctly
293 implemented in perl's sv.c. */
294 if (allocated == numbered_arg_count)
296 allocated = 2 * allocated + 1;
297 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
299 numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count);
300 numbered[numbered_arg_count].type = FAT_INTEGER;
301 numbered_arg_count++;
303 else if (isnonzerodigit (*format))
305 do format++; while (isdigit (*format));
308 /* Parse precision. */
315 unsigned int precision_number = 0;
319 if (isnonzerodigit (*format))
321 const char *f = format;
326 m = 10 * m + (*f - '0');
329 while (isdigit (*f));
333 precision_number = m;
338 /* Numbered or unnumbered argument. */
339 if (allocated == numbered_arg_count)
341 allocated = 2 * allocated + 1;
342 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
344 numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count);
345 numbered[numbered_arg_count].type = FAT_INTEGER;
346 numbered_arg_count++;
350 while (isdigit (*format)) format++;
358 size = FAT_SIZE_SHORT;
361 else if (*format == 'l')
363 if (format[1] == 'l')
365 size = FAT_SIZE_LONGLONG;
370 size = FAT_SIZE_LONG;
374 else if (*format == 'L' || *format == 'q')
376 size = FAT_SIZE_LONGLONG;
379 else if (*format == 'V')
384 else if (*format == 'I')
386 if (format[1] == '6' && format[2] == '4')
388 size = FAT_SIZE_LONGLONG;
391 else if (format[1] == '3' && format[2] == '2')
393 size = 0; /* FAT_SIZE_INT */
415 type = FAT_SCALAR_VECTOR;
418 type = FAT_INTEGER | FAT_SIZE_V;
421 type = FAT_INTEGER | size;
424 type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V;
426 case 'u': case 'b': case 'o': case 'x': case 'X':
427 type = FAT_INTEGER | FAT_UNSIGNED | size;
429 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
430 if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG)
433 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format);
434 FDI_SET (format, FMTDIR_ERROR);
437 type = FAT_DOUBLE | size;
443 type = FAT_COUNT_POINTER | size;
448 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
449 FDI_SET (format - 1, FMTDIR_ERROR);
454 INVALID_CONVERSION_SPECIFIER (directives, *format);
455 FDI_SET (format, FMTDIR_ERROR);
460 if (type != FAT_NONE && !vectorize)
462 /* Numbered or unnumbered argument. */
463 if (allocated == numbered_arg_count)
465 allocated = 2 * allocated + 1;
466 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
468 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
469 numbered[numbered_arg_count].type = type;
470 numbered_arg_count++;
473 FDI_SET (format, FMTDIR_END);
478 /* Sort the numbered argument array, and eliminate duplicates. */
479 if (numbered_arg_count > 1)
484 qsort (numbered, numbered_arg_count,
485 sizeof (struct numbered_arg), numbered_arg_compare);
487 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
489 for (i = j = 0; i < numbered_arg_count; i++)
490 if (j > 0 && numbered[i].number == numbered[j-1].number)
492 format_arg_type_t type1 = numbered[i].type;
493 format_arg_type_t type2 = numbered[j-1].type;
494 format_arg_type_t type_both;
500 /* Incompatible types. */
501 type_both = FAT_NONE;
504 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
508 numbered[j-1].type = type_both;
514 numbered[j].number = numbered[i].number;
515 numbered[j].type = numbered[i].type;
519 numbered_arg_count = j;
521 /* *invalid_reason has already been set above. */
525 result = XMALLOC (struct spec);
526 result->directives = directives;
527 result->numbered_arg_count = numbered_arg_count;
528 result->allocated = allocated;
529 result->numbered = numbered;
533 if (numbered != NULL)
539 format_free (void *descr)
541 struct spec *spec = (struct spec *) descr;
543 if (spec->numbered != NULL)
544 free (spec->numbered);
549 format_get_number_of_directives (void *descr)
551 struct spec *spec = (struct spec *) descr;
553 return spec->directives;
557 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
558 formatstring_error_logger_t error_logger,
559 const char *pretty_msgid, const char *pretty_msgstr)
561 struct spec *spec1 = (struct spec *) msgid_descr;
562 struct spec *spec2 = (struct spec *) msgstr_descr;
565 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
568 unsigned int n1 = spec1->numbered_arg_count;
569 unsigned int n2 = spec2->numbered_arg_count;
571 /* Check the argument names are the same.
572 Both arrays are sorted. We search for the first difference. */
573 for (i = 0, j = 0; i < n1 || j < n2; )
575 int cmp = (i >= n1 ? 1 :
577 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
578 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
584 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
585 spec2->numbered[j].number, pretty_msgstr,
595 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
596 spec1->numbered[i].number, pretty_msgstr);
606 /* Check the argument types are the same. */
608 for (i = 0, j = 0; j < n2; )
610 if (spec1->numbered[i].number == spec2->numbered[j].number)
612 if (spec1->numbered[i].type != spec2->numbered[j].type)
615 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
616 pretty_msgid, pretty_msgstr,
617 spec2->numbered[j].number);
632 struct formatstring_parser formatstring_perl =
636 format_get_number_of_directives,
644 /* Test program: Print the argument list specification returned by
645 format_parse for strings read from standard input. */
650 format_print (void *descr)
652 struct spec *spec = (struct spec *) descr;
664 for (i = 0; i < spec->numbered_arg_count; i++)
666 unsigned int number = spec->numbered[i].number;
672 for (; last < number; last++)
674 if (spec->numbered[i].type & FAT_UNSIGNED)
675 printf ("[unsigned]");
676 switch (spec->numbered[i].type & FAT_SIZE_MASK)
692 case FAT_SIZE_LONGLONG:
693 printf ("[long long]");
698 switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
712 case FAT_SCALAR_VECTOR:
718 case FAT_COUNT_POINTER:
735 size_t line_size = 0;
737 char *invalid_reason;
740 line_len = getline (&line, &line_size, stdin);
743 if (line_len > 0 && line[line_len - 1] == '\n')
744 line[--line_len] = '\0';
746 invalid_reason = NULL;
747 descr = format_parse (line, false, NULL, &invalid_reason);
749 format_print (descr);
752 printf ("%s\n", invalid_reason);
754 free (invalid_reason);
762 * For Emacs M-x compile
764 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../gnulib-lib/libgettextlib.la"