1 /* Java format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
31 #include "xvasprintf.h"
32 #include "format-invalid.h"
35 #define _(str) gettext (str)
37 /* Java format strings are described in java/text/MessageFormat.html.
38 See also the ICU documentation class_MessageFormat.html.
40 messageFormatPattern := string ( "{" messageFormatElement "}" string )*
42 messageFormatElement := argument { "," elementFormat }
44 elementFormat := "time" { "," datetimeStyle }
45 | "date" { "," datetimeStyle }
46 | "number" { "," numberStyle }
47 | "choice" { "," choiceStyle }
49 datetimeStyle := "short"
55 numberStyle := "currency"
60 choiceStyle := choiceFormatPattern
62 dateFormatPattern see SimpleDateFormat.applyPattern
64 numberFormatPattern see DecimalFormat.applyPattern
66 choiceFormatPattern see ChoiceFormat constructor
68 In strings, literal curly braces can be used if quoted between single
69 quotes. A real single quote is represented by ''.
71 If a pattern is used, then unquoted braces in the pattern, if any, must
72 match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and
75 The argument is a number from 0 to 9, which corresponds to the arguments
76 presented in an array to be formatted.
78 It is ok to have unused arguments in the array.
80 Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern
81 to an elementFormat is equivalent to creating a SimpleDateFormat /
82 DecimalFormat / ChoiceFormat and use of setFormat. For example,
85 new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}.");
89 MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
90 form.setFormat(1, // Number of {} occurrence in the string!
91 new ChoiceFormat(new double[] { 0, 1, 2 },
92 new String[] { "no files", "one file",
93 "{0,number} files" }));
95 Note: The behaviour of quotes inside a choiceFormatPattern is not clear.
97 "abc{1,choice,0#{1,number,00';'000}}def"
99 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def"
101 "abc{1,choice,0#{1,number,00';'}}def"
102 JDK 1.1.x: interprets the semicolon as number suffix
103 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def"
109 FAT_OBJECT, /* java.lang.Object */
110 FAT_NUMBER, /* java.lang.Number */
111 FAT_DATE /* java.util.Date */
117 enum format_arg_type type;
122 unsigned int directives;
123 unsigned int numbered_arg_count;
124 unsigned int allocated;
125 struct numbered_arg *numbered;
129 /* Forward declaration of local functions. */
130 static bool date_format_parse (const char *format);
131 static bool number_format_parse (const char *format);
132 static bool choice_format_parse (const char *format, struct spec *spec,
133 char **invalid_reason);
137 - When we see a single-quote, ignore it, but toggle the quoting flag.
138 - When we see a double single-quote, ignore the first of the two.
139 Assumes local variables format, quoting. */
140 #define HANDLE_QUOTE \
141 if (*format == '\'' && *++format != '\'') \
144 /* Note that message_format_parse and choice_format_parse are mutually
145 recursive. This is because MessageFormat can use some ChoiceFormats,
146 and a ChoiceFormat is made up from several MessageFormats. */
148 /* Return true if a format is a valid messageFormatPattern.
149 Extracts argument type information into spec. */
151 message_format_parse (const char *format, char *fdi, struct spec *spec,
152 char **invalid_reason)
154 const char *const format_start = format;
155 bool quoting = false;
160 if (!quoting && *format == '{')
163 const char *element_start;
164 const char *element_end;
166 char *element_alloced;
169 enum format_arg_type type;
171 FDI_SET (format, FMTDIR_START);
174 element_start = ++format;
176 for (; *format != '\0'; format++)
180 else if (*format == '}')
191 xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'."));
192 FDI_SET (format - 1, FMTDIR_ERROR);
195 element_end = format++;
197 n = element_end - element_start;
198 element = element_alloced = (char *) xmalloca (n + 1);
199 memcpy (element, element_start, n);
202 if (!c_isdigit (*element))
205 xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec->directives);
206 FDI_SET (format - 1, FMTDIR_ERROR);
207 freea (element_alloced);
213 number = 10 * number + (*element - '0');
216 while (c_isdigit (*element));
219 if (*element == '\0')
221 else if (strncmp (element, ",time", 5) == 0
222 || strncmp (element, ",date", 5) == 0)
226 if (*element == '\0')
228 else if (*element == ',')
231 if (strcmp (element, "short") == 0
232 || strcmp (element, "medium") == 0
233 || strcmp (element, "long") == 0
234 || strcmp (element, "full") == 0
235 || date_format_parse (element))
240 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec->directives, element);
241 FDI_SET (format - 1, FMTDIR_ERROR);
242 freea (element_alloced);
251 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
252 FDI_SET (format - 1, FMTDIR_ERROR);
253 freea (element_alloced);
257 else if (strncmp (element, ",number", 7) == 0)
261 if (*element == '\0')
263 else if (*element == ',')
266 if (strcmp (element, "currency") == 0
267 || strcmp (element, "percent") == 0
268 || strcmp (element, "integer") == 0
269 || number_format_parse (element))
274 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec->directives, element);
275 FDI_SET (format - 1, FMTDIR_ERROR);
276 freea (element_alloced);
285 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
286 FDI_SET (format - 1, FMTDIR_ERROR);
287 freea (element_alloced);
291 else if (strncmp (element, ",choice", 7) == 0)
293 type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */
295 if (*element == '\0')
297 else if (*element == ',')
300 if (choice_format_parse (element, spec, invalid_reason))
304 FDI_SET (format - 1, FMTDIR_ERROR);
305 freea (element_alloced);
314 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
315 FDI_SET (format - 1, FMTDIR_ERROR);
316 freea (element_alloced);
323 xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec->directives, "time", "date", "number", "choice");
324 FDI_SET (format - 1, FMTDIR_ERROR);
325 freea (element_alloced);
328 freea (element_alloced);
330 if (spec->allocated == spec->numbered_arg_count)
332 spec->allocated = 2 * spec->allocated + 1;
333 spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg));
335 spec->numbered[spec->numbered_arg_count].number = number;
336 spec->numbered[spec->numbered_arg_count].type = type;
337 spec->numbered_arg_count++;
339 FDI_SET (format - 1, FMTDIR_END);
341 /* The doc says "ab}de" is invalid. Even though JDK accepts it. */
342 else if (!quoting && *format == '}')
344 FDI_SET (format, FMTDIR_START);
346 xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'."));
347 FDI_SET (format, FMTDIR_ERROR);
350 else if (*format != '\0')
359 /* Return true if a format is a valid dateFormatPattern. */
361 date_format_parse (const char *format)
363 /* Any string is valid. Single-quote starts a quoted section, to be
364 terminated at the next single-quote or string end. Double single-quote
365 gives a single single-quote. Non-quoted ASCII letters are first grouped
366 into blocks of equal letters. Then each block (e.g. 'yyyy') is
367 interpreted according to some rules. */
371 /* Return true if a format is a valid numberFormatPattern. */
373 number_format_parse (const char *format)
376 pattern := pos_pattern{';' neg_pattern}
377 pos_pattern := {prefix}number{suffix}
378 neg_pattern := {prefix}number{suffix}
379 number := integer{'.' fraction}{exponent}
380 prefix := '\u0000'..'\uFFFD' - special_characters
381 suffix := '\u0000'..'\uFFFD' - special_characters
382 integer := min_int | '#' | '#' integer | '#' ',' integer
383 min_int := '0' | '0' min_int | '0' ',' min_int
384 fraction := '0'* '#'*
385 exponent := 'E' '0' '0'*
387 X* 0 or more instances of X
388 { X } 0 or 1 instances of X
390 X..Y any character from X up to Y, inclusive
391 S - T characters in S, except those in T
392 Single-quote starts a quoted section, to be terminated at the next
393 single-quote or string end. Double single-quote gives a single
396 bool quoting = false;
397 bool seen_semicolon = false;
403 while (*format != '\0'
404 && !(!quoting && (*format == '0' || *format == '#')))
406 if (format[0] == '\\')
409 && c_isxdigit (format[2])
410 && c_isxdigit (format[3])
411 && c_isxdigit (format[4])
412 && c_isxdigit (format[5]))
423 if (!(!quoting && (*format == '0' || *format == '#')))
425 while (!quoting && *format == '#')
429 if (!quoting && *format == ',')
435 while (!quoting && *format == '0')
439 if (!quoting && *format == ',')
446 /* Parse fraction. */
447 if (!quoting && *format == '.')
451 while (!quoting && *format == '0')
456 while (!quoting && *format == '#')
463 /* Parse exponent. */
464 if (!quoting && *format == 'E')
466 const char *format_save = format;
469 if (!quoting && *format == '0')
476 while (!quoting && *format == '0');
481 format = format_save;
487 while (*format != '\0'
488 && (seen_semicolon || !(!quoting && *format == ';')))
490 if (format[0] == '\\')
493 && c_isxdigit (format[2])
494 && c_isxdigit (format[3])
495 && c_isxdigit (format[4])
496 && c_isxdigit (format[5]))
506 if (seen_semicolon || !(!quoting && *format == ';'))
510 return (*format == '\0');
513 /* Return true if a format is a valid choiceFormatPattern.
514 Extracts argument type information into spec. */
516 choice_format_parse (const char *format, struct spec *spec,
517 char **invalid_reason)
520 pattern := | choice | choice '|' pattern
521 choice := number separator messageformat
522 separator := '<' | '#' | '\u2264'
523 Single-quote starts a quoted section, to be terminated at the next
524 single-quote or string end. Double single-quote gives a single
527 bool quoting = false;
534 /* Don't bother looking too precisely into the syntax of the number.
535 It can contain various Unicode characters. */
536 bool number_nonempty;
539 bool msgformat_valid;
542 number_nonempty = false;
543 while (*format != '\0'
544 && !(!quoting && (*format == '<' || *format == '#'
545 || strncmp (format, "\\u2264", 6) == 0
548 if (format[0] == '\\')
551 && c_isxdigit (format[2])
552 && c_isxdigit (format[3])
553 && c_isxdigit (format[4])
554 && c_isxdigit (format[5]))
561 number_nonempty = true;
565 /* Short clause at end of pattern is valid and is ignored! */
569 if (!number_nonempty)
572 xasprintf (_("In the directive number %u, a choice contains no number."), spec->directives);
576 if (*format == '<' || *format == '#')
578 else if (strncmp (format, "\\u2264", 6) == 0)
583 xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec->directives, "\\u2264");
588 msgformat = (char *) xmalloca (strlen (format) + 1);
591 while (*format != '\0' && !(!quoting && *format == '|'))
599 message_format_parse (msgformat, NULL, spec, invalid_reason);
603 if (!msgformat_valid)
617 numbered_arg_compare (const void *p1, const void *p2)
619 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
620 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
622 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
626 format_parse (const char *format, bool translated, char *fdi,
627 char **invalid_reason)
633 spec.numbered_arg_count = 0;
635 spec.numbered = NULL;
637 if (!message_format_parse (format, fdi, &spec, invalid_reason))
640 /* Sort the numbered argument array, and eliminate duplicates. */
641 if (spec.numbered_arg_count > 1)
646 qsort (spec.numbered, spec.numbered_arg_count,
647 sizeof (struct numbered_arg), numbered_arg_compare);
649 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
651 for (i = j = 0; i < spec.numbered_arg_count; i++)
652 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
654 enum format_arg_type type1 = spec.numbered[i].type;
655 enum format_arg_type type2 = spec.numbered[j-1].type;
656 enum format_arg_type type_both;
658 if (type1 == type2 || type2 == FAT_OBJECT)
660 else if (type1 == FAT_OBJECT)
664 /* Incompatible types. */
665 type_both = FAT_NONE;
668 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
672 spec.numbered[j-1].type = type_both;
678 spec.numbered[j].number = spec.numbered[i].number;
679 spec.numbered[j].type = spec.numbered[i].type;
683 spec.numbered_arg_count = j;
685 /* *invalid_reason has already been set above. */
689 result = XMALLOC (struct spec);
694 if (spec.numbered != NULL)
695 free (spec.numbered);
700 format_free (void *descr)
702 struct spec *spec = (struct spec *) descr;
704 if (spec->numbered != NULL)
705 free (spec->numbered);
710 format_get_number_of_directives (void *descr)
712 struct spec *spec = (struct spec *) descr;
714 return spec->directives;
718 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
719 formatstring_error_logger_t error_logger,
720 const char *pretty_msgid, const char *pretty_msgstr)
722 struct spec *spec1 = (struct spec *) msgid_descr;
723 struct spec *spec2 = (struct spec *) msgstr_descr;
726 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
729 unsigned int n1 = spec1->numbered_arg_count;
730 unsigned int n2 = spec2->numbered_arg_count;
732 /* Check the argument names are the same.
733 Both arrays are sorted. We search for the first difference. */
734 for (i = 0, j = 0; i < n1 || j < n2; )
736 int cmp = (i >= n1 ? 1 :
738 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
739 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
745 error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in '%s'"),
746 spec2->numbered[j].number, pretty_msgstr,
756 error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"),
757 spec1->numbered[i].number, pretty_msgstr);
767 /* Check the argument types are the same. */
769 for (i = 0, j = 0; j < n2; )
771 if (spec1->numbered[i].number == spec2->numbered[j].number)
773 if (spec1->numbered[i].type != spec2->numbered[j].type)
776 error_logger (_("format specifications in '%s' and '%s' for argument {%u} are not the same"),
777 pretty_msgid, pretty_msgstr,
778 spec2->numbered[j].number);
793 struct formatstring_parser formatstring_java =
797 format_get_number_of_directives,
805 /* Test program: Print the argument list specification returned by
806 format_parse for strings read from standard input. */
811 format_print (void *descr)
813 struct spec *spec = (struct spec *) descr;
825 for (i = 0; i < spec->numbered_arg_count; i++)
827 unsigned int number = spec->numbered[i].number;
833 for (; last < number; last++)
835 switch (spec->numbered[i].type)
860 size_t line_size = 0;
862 char *invalid_reason;
865 line_len = getline (&line, &line_size, stdin);
868 if (line_len > 0 && line[line_len - 1] == '\n')
869 line[--line_len] = '\0';
871 invalid_reason = NULL;
872 descr = format_parse (line, false, NULL, &invalid_reason);
874 format_print (descr);
877 printf ("%s\n", invalid_reason);
879 free (invalid_reason);
887 * For Emacs M-x compile
889 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../gnulib-lib/libgettextlib.la"