1 /* Parsing C format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009-2010, 2015 Free Software
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* C format strings are described in POSIX (IEEE P1003.1 2001), section
21 XSH 3 fprintf(). See also Linux fprintf(3) manual page.
23 - starts with '%' or '%m$' where m is a positive integer,
24 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
25 "'", or - only in msgstr strings - the string "I", each of which acts as
27 - is optionally followed by a width specification: '*' (reads an argument)
28 or '*m$' or a nonempty digit sequence,
29 - is optionally followed by '.' and a precision specification: '*' (reads
30 an argument) or '*m$' or a nonempty digit sequence,
31 - is either continued like this:
32 - is optionally followed by a size specifier, one of 'hh' 'h' 'l' 'll'
34 - is finished by a specifier
35 - '%', that needs no argument,
36 - 'c', 'C', that need a character argument,
37 - 's', 'S', that need a string argument,
38 - 'i', 'd', that need a signed integer argument,
39 - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
40 - 'e', 'E', 'f', 'F', 'g', 'G', 'a', 'A', that need a floating-point
42 - 'p', that needs a 'void *' argument,
43 - 'n', that needs a pointer to integer.
44 or is finished by a specifier '<' inttypes-macro '>' where inttypes-macro
45 is an ISO C 99 section 7.8.1 format directive.
46 Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
47 be used in the same string. When numbered argument specifications are
48 used, specifying the Nth argument requires that all the leading arguments,
49 from the first to the (N-1)th, are specified in the format string.
62 FAT_COUNT_POINTER = 7,
64 FAT_UNSIGNED = 1 << 3,
65 FAT_SIZE_SHORT = 1 << 4,
66 FAT_SIZE_CHAR = 2 << 4,
67 FAT_SIZE_LONG = 1 << 6,
68 FAT_SIZE_LONGLONG = 2 << 6,
69 FAT_SIZE_8_T = 1 << 8,
70 FAT_SIZE_16_T = 1 << 9,
71 FAT_SIZE_32_T = 1 << 10,
72 FAT_SIZE_64_T = 1 << 11,
73 FAT_SIZE_LEAST8_T = 1 << 12,
74 FAT_SIZE_LEAST16_T = 1 << 13,
75 FAT_SIZE_LEAST32_T = 1 << 14,
76 FAT_SIZE_LEAST64_T = 1 << 15,
77 FAT_SIZE_FAST8_T = 1 << 16,
78 FAT_SIZE_FAST16_T = 1 << 17,
79 FAT_SIZE_FAST32_T = 1 << 18,
80 FAT_SIZE_FAST64_T = 1 << 19,
81 FAT_SIZE_INTMAX_T = 1 << 20,
82 FAT_SIZE_INTPTR_T = 1 << 21,
83 FAT_SIZE_SIZE_T = 1 << 22,
84 FAT_SIZE_PTRDIFF_T = 1 << 23,
85 FAT_WIDE = FAT_SIZE_LONG,
86 /* Meaningful combinations of basic types and flags:
87 'signed char' = FAT_INTEGER | FAT_SIZE_CHAR,
88 'unsigned char' = FAT_INTEGER | FAT_SIZE_CHAR | FAT_UNSIGNED,
89 'short' = FAT_INTEGER | FAT_SIZE_SHORT,
90 'unsigned short' = FAT_INTEGER | FAT_SIZE_SHORT | FAT_UNSIGNED,
92 'unsigned int' = FAT_INTEGER | FAT_UNSIGNED,
93 'long int' = FAT_INTEGER | FAT_SIZE_LONG,
94 'unsigned long int' = FAT_INTEGER | FAT_SIZE_LONG | FAT_UNSIGNED,
95 'long long int' = FAT_INTEGER | FAT_SIZE_LONGLONG,
96 'unsigned long long int' = FAT_INTEGER | FAT_SIZE_LONGLONG | FAT_UNSIGNED,
97 'double' = FAT_DOUBLE,
98 'long double' = FAT_DOUBLE | FAT_SIZE_LONGLONG,
99 'char'/'int' = FAT_CHAR,
100 'wchar_t'/'wint_t' = FAT_CHAR | FAT_SIZE_LONG,
101 'const char *' = FAT_STRING,
102 'const wchar_t *' = FAT_STRING | FAT_SIZE_LONG,
103 'void *' = FAT_POINTER,
104 FAT_COUNT_SCHAR_POINTER = FAT_COUNT_POINTER | FAT_SIZE_CHAR,
105 FAT_COUNT_SHORT_POINTER = FAT_COUNT_POINTER | FAT_SIZE_SHORT,
106 FAT_COUNT_INT_POINTER = FAT_COUNT_POINTER,
107 FAT_COUNT_LONGINT_POINTER = FAT_COUNT_POINTER | FAT_SIZE_LONG,
108 FAT_COUNT_LONGLONGINT_POINTER = FAT_COUNT_POINTER | FAT_SIZE_LONGLONG,
111 FAT_BASIC_MASK = (FAT_INTEGER | FAT_DOUBLE | FAT_CHAR | FAT_STRING
112 | FAT_OBJC_OBJECT | FAT_POINTER | FAT_COUNT_POINTER),
113 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_CHAR
114 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG
115 | FAT_SIZE_8_T | FAT_SIZE_16_T
116 | FAT_SIZE_32_T | FAT_SIZE_64_T
117 | FAT_SIZE_LEAST8_T | FAT_SIZE_LEAST16_T
118 | FAT_SIZE_LEAST32_T | FAT_SIZE_LEAST64_T
119 | FAT_SIZE_FAST8_T | FAT_SIZE_FAST16_T
120 | FAT_SIZE_FAST32_T | FAT_SIZE_FAST64_T
121 | FAT_SIZE_INTMAX_T | FAT_SIZE_INTPTR_T
122 | FAT_SIZE_SIZE_T | FAT_SIZE_PTRDIFF_T)
125 typedef int format_arg_type_t;
127 typedef enum format_arg_type format_arg_type_t;
133 format_arg_type_t type;
136 struct unnumbered_arg
138 format_arg_type_t type;
143 unsigned int directives;
144 unsigned int unnumbered_arg_count;
145 unsigned int allocated;
146 struct unnumbered_arg *unnumbered;
147 bool unlikely_intentional;
148 unsigned int sysdep_directives_count;
149 const char **sysdep_directives;
152 /* Locale independent test for a decimal digit.
153 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
154 <ctype.h> isdigit must be an 'unsigned char'.) */
156 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
158 /* Whether to recognize the 'I' flag. */
159 #if SYSDEP_SEGMENTS_PROCESSED
160 /* The 'I' flag can only occur in glibc >= 2.2. On other platforms, gettext()
161 filters it away even if it is present in the msgstr in the .mo file. */
162 # define HANDLE_I_FLAG \
163 ((__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) \
164 && !defined __UCLIBC__)
166 # define HANDLE_I_FLAG 1
171 numbered_arg_compare (const void *p1, const void *p2)
173 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
174 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
176 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
180 format_parse_entrails (const char *format, bool translated,
181 bool objc_extensions, char *fdi, char **invalid_reason,
184 const char *const format_start = format;
186 unsigned int numbered_arg_count;
187 struct numbered_arg *numbered;
190 numbered_arg_count = 0;
191 spec.unnumbered_arg_count = 0;
194 spec.unnumbered = NULL;
195 spec.unlikely_intentional = false;
196 spec.sysdep_directives_count = 0;
197 spec.sysdep_directives = NULL;
199 for (; *format != '\0';)
200 if (*format++ == '%')
203 unsigned int number = 0;
204 format_arg_type_t type;
205 format_arg_type_t size;
207 FDI_SET (format - 1, FMTDIR_START);
210 if (isdigit (*format))
212 const char *f = format;
217 m = 10 * m + (*f - '0');
220 while (isdigit (*f));
226 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
227 FDI_SET (f, FMTDIR_ERROR);
238 if (*format == ' ' || *format == '+' || *format == '-'
239 || *format == '#' || *format == '0' || *format == '\'')
242 else if (translated && *format == 'I')
244 spec.sysdep_directives =
246 xrealloc (spec.sysdep_directives,
247 2 * (spec.sysdep_directives_count + 1)
248 * sizeof (const char *));
249 IF_OOM (spec.sysdep_directives, goto bad_format;)
250 spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
251 spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
252 spec.sysdep_directives_count++;
263 unsigned int width_number = 0;
267 if (isdigit (*format))
269 const char *f = format;
274 m = 10 * m + (*f - '0');
277 while (isdigit (*f));
284 INVALID_WIDTH_ARGNO_0 (spec.directives);
285 FDI_SET (f, FMTDIR_ERROR);
295 /* Numbered argument. */
297 /* Numbered and unnumbered specifications are exclusive. */
298 if (spec.unnumbered_arg_count > 0)
300 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
301 FDI_SET (format - 1, FMTDIR_ERROR);
305 if (spec.allocated == numbered_arg_count)
307 spec.allocated = 2 * spec.allocated + 1;
308 numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
309 IF_OOM (numbered, goto bad_format;)
311 numbered[numbered_arg_count].number = width_number;
312 numbered[numbered_arg_count].type = FAT_INTEGER;
313 numbered_arg_count++;
317 /* Unnumbered argument. */
319 /* Numbered and unnumbered specifications are exclusive. */
320 if (numbered_arg_count > 0)
322 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
323 FDI_SET (format - 1, FMTDIR_ERROR);
327 if (spec.allocated == spec.unnumbered_arg_count)
329 spec.allocated = 2 * spec.allocated + 1;
330 spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
331 IF_OOM (spec.unnumbered, goto bad_format;)
333 spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
334 spec.unnumbered_arg_count++;
337 else if (isdigit (*format))
339 do format++; while (isdigit (*format));
342 /* Parse precision. */
349 unsigned int precision_number = 0;
353 if (isdigit (*format))
355 const char *f = format;
360 m = 10 * m + (*f - '0');
363 while (isdigit (*f));
370 INVALID_PRECISION_ARGNO_0 (spec.directives);
371 FDI_SET (f, FMTDIR_ERROR);
374 precision_number = m;
379 if (precision_number)
381 /* Numbered argument. */
383 /* Numbered and unnumbered specifications are exclusive. */
384 if (spec.unnumbered_arg_count > 0)
386 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
387 FDI_SET (format - 1, FMTDIR_ERROR);
391 if (spec.allocated == numbered_arg_count)
393 spec.allocated = 2 * spec.allocated + 1;
394 numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
395 IF_OOM (numbered, goto bad_format;)
397 numbered[numbered_arg_count].number = precision_number;
398 numbered[numbered_arg_count].type = FAT_INTEGER;
399 numbered_arg_count++;
403 /* Unnumbered argument. */
405 /* Numbered and unnumbered specifications are exclusive. */
406 if (numbered_arg_count > 0)
408 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
409 FDI_SET (format - 1, FMTDIR_ERROR);
413 if (spec.allocated == spec.unnumbered_arg_count)
415 spec.allocated = 2 * spec.allocated + 1;
416 spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
417 IF_OOM (spec.unnumbered, goto bad_format;)
419 spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
420 spec.unnumbered_arg_count++;
423 else if (isdigit (*format))
425 do format++; while (isdigit (*format));
429 if (!SYSDEP_SEGMENTS_PROCESSED && *format == '<')
431 spec.sysdep_directives =
433 xrealloc (spec.sysdep_directives,
434 2 * (spec.sysdep_directives_count + 1)
435 * sizeof (const char *));
436 IF_OOM (spec.sysdep_directives, goto bad_format;)
437 spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
440 /* Parse ISO C 99 section 7.8.1 format string directive.
442 P R I { d | i | o | u | x | X }
443 { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR } */
446 *invalid_reason = INVALID_C99_MACRO (spec.directives);
447 FDI_SET (*format == '\0' ? format - 1 : format, FMTDIR_ERROR);
453 *invalid_reason = INVALID_C99_MACRO (spec.directives);
454 FDI_SET (*format == '\0' ? format - 1 : format, FMTDIR_ERROR);
460 *invalid_reason = INVALID_C99_MACRO (spec.directives);
461 FDI_SET (*format == '\0' ? format - 1 : format, FMTDIR_ERROR);
471 case 'u': case 'o': case 'x': case 'X':
472 type = FAT_INTEGER | FAT_UNSIGNED;
475 *invalid_reason = INVALID_C99_MACRO (spec.directives);
476 FDI_SET (*format == '\0' ? format - 1 : format, FMTDIR_ERROR);
481 if (format[0] == 'M' && format[1] == 'A' && format[2] == 'X')
483 type |= FAT_SIZE_INTMAX_T;
486 else if (format[0] == 'P' && format[1] == 'T' && format[2] == 'R')
488 type |= FAT_SIZE_INTPTR_T;
493 if (format[0] == 'L' && format[1] == 'E' && format[2] == 'A'
494 && format[3] == 'S' && format[4] == 'T')
497 if (format[0] == '8')
499 type |= FAT_SIZE_LEAST8_T;
502 else if (format[0] == '1' && format[1] == '6')
504 type |= FAT_SIZE_LEAST16_T;
507 else if (format[0] == '3' && format[1] == '2')
509 type |= FAT_SIZE_LEAST32_T;
512 else if (format[0] == '6' && format[1] == '4')
514 type |= FAT_SIZE_LEAST64_T;
519 *invalid_reason = INVALID_C99_MACRO (spec.directives);
520 FDI_SET (*format == '\0' ? format - 1 : format,
525 else if (format[0] == 'F' && format[1] == 'A'
526 && format[2] == 'S' && format[3] == 'T')
529 if (format[0] == '8')
531 type |= FAT_SIZE_FAST8_T;
534 else if (format[0] == '1' && format[1] == '6')
536 type |= FAT_SIZE_FAST16_T;
539 else if (format[0] == '3' && format[1] == '2')
541 type |= FAT_SIZE_FAST32_T;
544 else if (format[0] == '6' && format[1] == '4')
546 type |= FAT_SIZE_FAST64_T;
551 *invalid_reason = INVALID_C99_MACRO (spec.directives);
552 FDI_SET (*format == '\0' ? format - 1 : format,
559 if (format[0] == '8')
561 type |= FAT_SIZE_8_T;
564 else if (format[0] == '1' && format[1] == '6')
566 type |= FAT_SIZE_16_T;
569 else if (format[0] == '3' && format[1] == '2')
571 type |= FAT_SIZE_32_T;
574 else if (format[0] == '6' && format[1] == '4')
576 type |= FAT_SIZE_64_T;
581 *invalid_reason = INVALID_C99_MACRO (spec.directives);
582 FDI_SET (*format == '\0' ? format - 1 : format,
591 *invalid_reason = INVALID_ANGLE_BRACKET (spec.directives);
592 FDI_SET (*format == '\0' ? format - 1 : format, FMTDIR_ERROR);
596 spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
597 spec.sysdep_directives_count++;
607 if (size & (FAT_SIZE_SHORT | FAT_SIZE_CHAR))
608 size = FAT_SIZE_CHAR;
610 size = FAT_SIZE_SHORT;
612 else if (*format == 'l')
614 if (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG))
615 size = FAT_SIZE_LONGLONG;
617 size = FAT_SIZE_LONG;
619 else if (*format == 'L')
620 size = FAT_SIZE_LONGLONG;
621 else if (*format == 'q')
622 /* Old BSD 4.4 convention. */
623 size = FAT_SIZE_LONGLONG;
624 else if (*format == 'j')
625 size = FAT_SIZE_INTMAX_T;
626 else if (*format == 'z' || *format == 'Z')
627 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
628 because the warning facility in gcc-2.95.2 understands
629 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
630 size = FAT_SIZE_SIZE_T;
631 else if (*format == 't')
632 size = FAT_SIZE_PTRDIFF_T;
633 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
634 else if (SYSDEP_SEGMENTS_PROCESSED
639 size = FAT_SIZE_64_T;
650 /* Programmers writing _("%2%") most often will not want to
651 use this string as a c-format string, but rather as a
652 literal or as a different kind of format string. */
653 if (format[-1] != '%')
654 spec.unlikely_intentional = true;
657 case 'm': /* glibc extension */
662 type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
665 case 'C': /* obsolete */
666 type = FAT_CHAR | FAT_WIDE;
670 type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
673 case 'S': /* obsolete */
674 type = FAT_STRING | FAT_WIDE;
678 type |= (size & FAT_SIZE_MASK);
680 case 'u': case 'o': case 'x': case 'X':
681 type = FAT_INTEGER | FAT_UNSIGNED;
682 type |= (size & FAT_SIZE_MASK);
684 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
687 type |= (size & FAT_SIZE_LONGLONG);
692 type = FAT_OBJC_OBJECT;
700 type = FAT_COUNT_POINTER;
701 type |= (size & FAT_SIZE_MASK);
707 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
708 FDI_SET (format - 1, FMTDIR_ERROR);
713 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
714 FDI_SET (format, FMTDIR_ERROR);
720 if (type != FAT_NONE)
724 /* Numbered argument. */
726 /* Numbered and unnumbered specifications are exclusive. */
727 if (spec.unnumbered_arg_count > 0)
729 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
730 FDI_SET (format, FMTDIR_ERROR);
734 if (spec.allocated == numbered_arg_count)
736 spec.allocated = 2 * spec.allocated + 1;
737 numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
738 IF_OOM (numbered, goto bad_format;)
740 numbered[numbered_arg_count].number = number;
741 numbered[numbered_arg_count].type = type;
742 numbered_arg_count++;
746 /* Unnumbered argument. */
748 /* Numbered and unnumbered specifications are exclusive. */
749 if (numbered_arg_count > 0)
751 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
752 FDI_SET (format, FMTDIR_ERROR);
756 if (spec.allocated == spec.unnumbered_arg_count)
758 spec.allocated = 2 * spec.allocated + 1;
759 spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
760 IF_OOM (spec.unnumbered, goto bad_format;)
762 spec.unnumbered[spec.unnumbered_arg_count].type = type;
763 spec.unnumbered_arg_count++;
767 FDI_SET (format, FMTDIR_END);
772 /* Sort the numbered argument array, and eliminate duplicates. */
773 if (numbered_arg_count > 1)
778 qsort (numbered, numbered_arg_count,
779 sizeof (struct numbered_arg), numbered_arg_compare);
781 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
783 for (i = j = 0; i < numbered_arg_count; i++)
784 if (j > 0 && numbered[i].number == numbered[j-1].number)
786 format_arg_type_t type1 = numbered[i].type;
787 format_arg_type_t type2 = numbered[j-1].type;
788 format_arg_type_t type_both;
794 /* Incompatible types. */
795 type_both = FAT_NONE;
798 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
802 numbered[j-1].type = type_both;
808 numbered[j].number = numbered[i].number;
809 numbered[j].type = numbered[i].type;
813 numbered_arg_count = j;
815 /* *invalid_reason has already been set above. */
819 /* Verify that the format strings uses all arguments up to the highest
821 if (numbered_arg_count > 0)
825 for (i = 0; i < numbered_arg_count; i++)
826 if (numbered[i].number != i + 1)
828 *invalid_reason = INVALID_IGNORED_ARGUMENT (numbered[i].number, i + 1);
832 /* So now the numbered arguments array is equivalent to a sequence
833 of unnumbered arguments. */
834 spec.unnumbered_arg_count = numbered_arg_count;
835 spec.allocated = spec.unnumbered_arg_count;
836 spec.unnumbered = XNMALLOC (spec.allocated, struct unnumbered_arg);
837 IF_OOM (spec.unnumbered, goto bad_format;)
838 for (i = 0; i < spec.unnumbered_arg_count; i++)
839 spec.unnumbered[i].type = numbered[i].type;
841 numbered_arg_count = 0;
848 if (numbered != NULL)
850 if (spec.unnumbered != NULL)
851 free (spec.unnumbered);
852 if (spec.sysdep_directives != NULL)
853 free (spec.sysdep_directives);