1 /* Java format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2015 Free Software
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
32 #include "xvasprintf.h"
33 #include "format-invalid.h"
36 #define _(str) gettext (str)
38 /* Java format strings are described in java/text/MessageFormat.html.
39 See also the ICU documentation class_MessageFormat.html.
41 messageFormatPattern := string ( "{" messageFormatElement "}" string )*
43 messageFormatElement := argument { "," elementFormat }
45 elementFormat := "time" { "," datetimeStyle }
46 | "date" { "," datetimeStyle }
47 | "number" { "," numberStyle }
48 | "choice" { "," choiceStyle }
50 datetimeStyle := "short"
56 numberStyle := "currency"
61 choiceStyle := choiceFormatPattern
63 dateFormatPattern see SimpleDateFormat.applyPattern
65 numberFormatPattern see DecimalFormat.applyPattern
67 choiceFormatPattern see ChoiceFormat constructor
69 In strings, literal curly braces can be used if quoted between single
70 quotes. A real single quote is represented by ''.
72 If a pattern is used, then unquoted braces in the pattern, if any, must
73 match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and
76 The argument is a number from 0 to 9, which corresponds to the arguments
77 presented in an array to be formatted.
79 It is ok to have unused arguments in the array.
81 Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern
82 to an elementFormat is equivalent to creating a SimpleDateFormat /
83 DecimalFormat / ChoiceFormat and use of setFormat. For example,
86 new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}.");
90 MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
91 form.setFormat(1, // Number of {} occurrence in the string!
92 new ChoiceFormat(new double[] { 0, 1, 2 },
93 new String[] { "no files", "one file",
94 "{0,number} files" }));
96 Note: The behaviour of quotes inside a choiceFormatPattern is not clear.
98 "abc{1,choice,0#{1,number,00';'000}}def"
100 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def"
102 "abc{1,choice,0#{1,number,00';'}}def"
103 JDK 1.1.x: interprets the semicolon as number suffix
104 JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def"
110 FAT_OBJECT, /* java.lang.Object */
111 FAT_NUMBER, /* java.lang.Number */
112 FAT_DATE /* java.util.Date */
118 enum format_arg_type type;
123 unsigned int directives;
124 unsigned int numbered_arg_count;
125 unsigned int allocated;
126 struct numbered_arg *numbered;
130 /* Forward declaration of local functions. */
131 static bool date_format_parse (const char *format);
132 static bool number_format_parse (const char *format);
133 static bool choice_format_parse (const char *format, struct spec *spec,
134 char **invalid_reason);
138 - When we see a single-quote, ignore it, but toggle the quoting flag.
139 - When we see a double single-quote, ignore the first of the two.
140 Assumes local variables format, quoting. */
141 #define HANDLE_QUOTE \
142 if (*format == '\'' && *++format != '\'') \
145 /* Note that message_format_parse and choice_format_parse are mutually
146 recursive. This is because MessageFormat can use some ChoiceFormats,
147 and a ChoiceFormat is made up from several MessageFormats. */
149 /* Return true if a format is a valid messageFormatPattern.
150 Extracts argument type information into spec. */
152 message_format_parse (const char *format, char *fdi, struct spec *spec,
153 char **invalid_reason)
155 const char *const format_start = format;
156 bool quoting = false;
161 if (!quoting && *format == '{')
164 const char *element_start;
165 const char *element_end;
167 char *element_alloced;
170 enum format_arg_type type;
172 FDI_SET (format, FMTDIR_START);
175 element_start = ++format;
177 for (; *format != '\0'; format++)
181 else if (*format == '}')
192 xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'."));
193 FDI_SET (format - 1, FMTDIR_ERROR);
196 element_end = format++;
198 n = element_end - element_start;
199 element = element_alloced = (char *) xmalloca (n + 1);
200 memcpy (element, element_start, n);
203 if (!c_isdigit (*element))
206 xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec->directives);
207 FDI_SET (format - 1, FMTDIR_ERROR);
208 freea (element_alloced);
214 number = 10 * number + (*element - '0');
217 while (c_isdigit (*element));
220 if (*element == '\0')
222 else if (strncmp (element, ",time", 5) == 0
223 || strncmp (element, ",date", 5) == 0)
227 if (*element == '\0')
229 else if (*element == ',')
232 if (strcmp (element, "short") == 0
233 || strcmp (element, "medium") == 0
234 || strcmp (element, "long") == 0
235 || strcmp (element, "full") == 0
236 || date_format_parse (element))
241 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec->directives, element);
242 FDI_SET (format - 1, FMTDIR_ERROR);
243 freea (element_alloced);
252 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
253 FDI_SET (format - 1, FMTDIR_ERROR);
254 freea (element_alloced);
258 else if (strncmp (element, ",number", 7) == 0)
262 if (*element == '\0')
264 else if (*element == ',')
267 if (strcmp (element, "currency") == 0
268 || strcmp (element, "percent") == 0
269 || strcmp (element, "integer") == 0
270 || number_format_parse (element))
275 xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec->directives, element);
276 FDI_SET (format - 1, FMTDIR_ERROR);
277 freea (element_alloced);
286 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
287 FDI_SET (format - 1, FMTDIR_ERROR);
288 freea (element_alloced);
292 else if (strncmp (element, ",choice", 7) == 0)
294 type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */
296 if (*element == '\0')
298 else if (*element == ',')
301 if (choice_format_parse (element, spec, invalid_reason))
305 FDI_SET (format - 1, FMTDIR_ERROR);
306 freea (element_alloced);
315 xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element);
316 FDI_SET (format - 1, FMTDIR_ERROR);
317 freea (element_alloced);
324 xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec->directives, "time", "date", "number", "choice");
325 FDI_SET (format - 1, FMTDIR_ERROR);
326 freea (element_alloced);
329 freea (element_alloced);
331 if (spec->allocated == spec->numbered_arg_count)
333 spec->allocated = 2 * spec->allocated + 1;
334 spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg));
336 spec->numbered[spec->numbered_arg_count].number = number;
337 spec->numbered[spec->numbered_arg_count].type = type;
338 spec->numbered_arg_count++;
340 FDI_SET (format - 1, FMTDIR_END);
342 /* The doc says "ab}de" is invalid. Even though JDK accepts it. */
343 else if (!quoting && *format == '}')
345 FDI_SET (format, FMTDIR_START);
347 xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'."));
348 FDI_SET (format, FMTDIR_ERROR);
351 else if (*format != '\0')
360 /* Return true if a format is a valid dateFormatPattern. */
362 date_format_parse (const char *format)
364 /* Any string is valid. Single-quote starts a quoted section, to be
365 terminated at the next single-quote or string end. Double single-quote
366 gives a single single-quote. Non-quoted ASCII letters are first grouped
367 into blocks of equal letters. Then each block (e.g. 'yyyy') is
368 interpreted according to some rules. */
372 /* Return true if a format is a valid numberFormatPattern. */
374 number_format_parse (const char *format)
377 pattern := pos_pattern{';' neg_pattern}
378 pos_pattern := {prefix}number{suffix}
379 neg_pattern := {prefix}number{suffix}
380 number := integer{'.' fraction}{exponent}
381 prefix := '\u0000'..'\uFFFD' - special_characters
382 suffix := '\u0000'..'\uFFFD' - special_characters
383 integer := min_int | '#' | '#' integer | '#' ',' integer
384 min_int := '0' | '0' min_int | '0' ',' min_int
385 fraction := '0'* '#'*
386 exponent := 'E' '0' '0'*
388 X* 0 or more instances of X
389 { X } 0 or 1 instances of X
391 X..Y any character from X up to Y, inclusive
392 S - T characters in S, except those in T
393 Single-quote starts a quoted section, to be terminated at the next
394 single-quote or string end. Double single-quote gives a single
397 bool quoting = false;
398 bool seen_semicolon = false;
404 while (*format != '\0'
405 && !(!quoting && (*format == '0' || *format == '#')))
407 if (format[0] == '\\')
410 && c_isxdigit (format[2])
411 && c_isxdigit (format[3])
412 && c_isxdigit (format[4])
413 && c_isxdigit (format[5]))
424 if (!(!quoting && (*format == '0' || *format == '#')))
426 while (!quoting && *format == '#')
430 if (!quoting && *format == ',')
436 while (!quoting && *format == '0')
440 if (!quoting && *format == ',')
447 /* Parse fraction. */
448 if (!quoting && *format == '.')
452 while (!quoting && *format == '0')
457 while (!quoting && *format == '#')
464 /* Parse exponent. */
465 if (!quoting && *format == 'E')
467 const char *format_save = format;
470 if (!quoting && *format == '0')
477 while (!quoting && *format == '0');
482 format = format_save;
488 while (*format != '\0'
489 && (seen_semicolon || !(!quoting && *format == ';')))
491 if (format[0] == '\\')
494 && c_isxdigit (format[2])
495 && c_isxdigit (format[3])
496 && c_isxdigit (format[4])
497 && c_isxdigit (format[5]))
507 if (seen_semicolon || !(!quoting && *format == ';'))
511 return (*format == '\0');
514 /* Return true if a format is a valid choiceFormatPattern.
515 Extracts argument type information into spec. */
517 choice_format_parse (const char *format, struct spec *spec,
518 char **invalid_reason)
521 pattern := | choice | choice '|' pattern
522 choice := number separator messageformat
523 separator := '<' | '#' | '\u2264'
524 Single-quote starts a quoted section, to be terminated at the next
525 single-quote or string end. Double single-quote gives a single
528 bool quoting = false;
535 /* Don't bother looking too precisely into the syntax of the number.
536 It can contain various Unicode characters. */
537 bool number_nonempty;
540 bool msgformat_valid;
543 number_nonempty = false;
544 while (*format != '\0'
545 && !(!quoting && (*format == '<' || *format == '#'
546 || strncmp (format, "\\u2264", 6) == 0
549 if (format[0] == '\\')
552 && c_isxdigit (format[2])
553 && c_isxdigit (format[3])
554 && c_isxdigit (format[4])
555 && c_isxdigit (format[5]))
562 number_nonempty = true;
566 /* Short clause at end of pattern is valid and is ignored! */
570 if (!number_nonempty)
573 xasprintf (_("In the directive number %u, a choice contains no number."), spec->directives);
577 if (*format == '<' || *format == '#')
579 else if (strncmp (format, "\\u2264", 6) == 0)
584 xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec->directives, "\\u2264");
589 msgformat = (char *) xmalloca (strlen (format) + 1);
592 while (*format != '\0' && !(!quoting && *format == '|'))
600 message_format_parse (msgformat, NULL, spec, invalid_reason);
604 if (!msgformat_valid)
618 numbered_arg_compare (const void *p1, const void *p2)
620 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
621 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
623 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
627 format_parse (const char *format, bool translated, char *fdi,
628 char **invalid_reason)
634 spec.numbered_arg_count = 0;
636 spec.numbered = NULL;
638 if (!message_format_parse (format, fdi, &spec, invalid_reason))
641 /* Sort the numbered argument array, and eliminate duplicates. */
642 if (spec.numbered_arg_count > 1)
647 qsort (spec.numbered, spec.numbered_arg_count,
648 sizeof (struct numbered_arg), numbered_arg_compare);
650 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
652 for (i = j = 0; i < spec.numbered_arg_count; i++)
653 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
655 enum format_arg_type type1 = spec.numbered[i].type;
656 enum format_arg_type type2 = spec.numbered[j-1].type;
657 enum format_arg_type type_both;
659 if (type1 == type2 || type2 == FAT_OBJECT)
661 else if (type1 == FAT_OBJECT)
665 /* Incompatible types. */
666 type_both = FAT_NONE;
669 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
673 spec.numbered[j-1].type = type_both;
679 spec.numbered[j].number = spec.numbered[i].number;
680 spec.numbered[j].type = spec.numbered[i].type;
684 spec.numbered_arg_count = j;
686 /* *invalid_reason has already been set above. */
690 result = XMALLOC (struct spec);
695 if (spec.numbered != NULL)
696 free (spec.numbered);
701 format_free (void *descr)
703 struct spec *spec = (struct spec *) descr;
705 if (spec->numbered != NULL)
706 free (spec->numbered);
711 format_get_number_of_directives (void *descr)
713 struct spec *spec = (struct spec *) descr;
715 return spec->directives;
719 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
720 formatstring_error_logger_t error_logger,
721 const char *pretty_msgid, const char *pretty_msgstr)
723 struct spec *spec1 = (struct spec *) msgid_descr;
724 struct spec *spec2 = (struct spec *) msgstr_descr;
727 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
730 unsigned int n1 = spec1->numbered_arg_count;
731 unsigned int n2 = spec2->numbered_arg_count;
733 /* Check the argument names are the same.
734 Both arrays are sorted. We search for the first difference. */
735 for (i = 0, j = 0; i < n1 || j < n2; )
737 int cmp = (i >= n1 ? 1 :
739 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
740 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
746 error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in '%s'"),
747 spec2->numbered[j].number, pretty_msgstr,
757 error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"),
758 spec1->numbered[i].number, pretty_msgstr);
768 /* Check the argument types are the same. */
770 for (i = 0, j = 0; j < n2; )
772 if (spec1->numbered[i].number == spec2->numbered[j].number)
774 if (spec1->numbered[i].type != spec2->numbered[j].type)
777 error_logger (_("format specifications in '%s' and '%s' for argument {%u} are not the same"),
778 pretty_msgid, pretty_msgstr,
779 spec2->numbered[j].number);
794 struct formatstring_parser formatstring_java =
798 format_get_number_of_directives,
806 /* Test program: Print the argument list specification returned by
807 format_parse for strings read from standard input. */
812 format_print (void *descr)
814 struct spec *spec = (struct spec *) descr;
826 for (i = 0; i < spec->numbered_arg_count; i++)
828 unsigned int number = spec->numbered[i].number;
834 for (; last < number; last++)
836 switch (spec->numbered[i].type)
861 size_t line_size = 0;
863 char *invalid_reason;
866 line_len = getline (&line, &line_size, stdin);
869 if (line_len > 0 && line[line_len - 1] == '\n')
870 line[--line_len] = '\0';
872 invalid_reason = NULL;
873 descr = format_parse (line, false, NULL, &invalid_reason);
875 format_print (descr);
878 printf ("%s\n", invalid_reason);
880 free (invalid_reason);
888 * For Emacs M-x compile
890 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../gnulib-lib/libgettextlib.la"