1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
35 #include "linereader.h"
36 #include "localeinfo.h"
40 /* Uncomment the following line in the production version. */
41 /* #define NDEBUG 1 */
44 /* Define the lookup function. */
45 #include "locfile-kw.h"
48 /* Some useful macros. */
49 #define MIN(a, b) (__extension__ ({ typeof (a) _a = (a); \
50 typeof (b) _b = (b); \
51 _a < _b ? _a : _b; }))
54 void *xmalloc (size_t __n);
55 char *xstrdup (const char *__str);
58 locfile_read (const char *filename, struct charset_t *charset)
60 struct linereader *ldfile;
61 struct localedef_t *result;
63 enum token_t expected_tok = tok_none;
64 const char *expected_str = NULL;
65 enum token_t ctype_tok_sym = tok_none;
66 const char *ctype_tok_str = NULL;
67 int copy_category = 0;
70 /* Allocate space for result. */
71 result = (struct localedef_t *) xmalloc (sizeof (struct localedef_t));
72 memset (result, '\0', sizeof (struct localedef_t));
74 ldfile = lr_open (filename, locfile_hash);
77 if (filename[0] != '/')
79 char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)];
81 stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename);
82 ldfile = lr_open (path, locfile_hash);
92 #define HANDLE_COPY(category, token, string) \
93 if (nowtok == tok_copy) \
95 copy_category = category; \
96 expected_tok = token; \
97 expected_str = string; \
103 #define LOCALE_PROLOG(token, string) \
104 if (nowtok == tok_eol) \
105 /* Ignore empty lines. */ \
107 if (nowtok == tok_end) \
109 expected_tok = token; \
110 expected_str = string; \
114 if (nowtok == tok_copy) \
118 #define READ_STRING(fn, errlabel) \
121 arg = lr_token (ldfile, charset); \
122 if (arg->tok != tok_string) \
124 fn (ldfile, result, nowtok, arg, charset); \
125 lr_ignore_rest (ldfile, 1); \
129 #define READ_STRING_LIST(fn, errlabel) \
132 arg = lr_token (ldfile, charset); \
133 while (arg->tok == tok_string) \
135 fn (ldfile, result, nowtok, arg, charset); \
136 arg = lr_token (ldfile, charset); \
137 if (arg->tok != tok_semicolon) \
139 arg = lr_token (ldfile, charset); \
141 if (arg->tok != tok_eol) \
146 #define READ_NUMBER(fn, errlabel) \
149 arg = lr_token (ldfile, charset); \
150 if (arg->tok != tok_minus1 && arg->tok != tok_number) \
152 fn (ldfile, result, nowtok, arg, charset); \
153 lr_ignore_rest (ldfile, 1); \
157 #define READ_NUMBER_LIST(fn, errlabel) \
160 arg = lr_token (ldfile, charset); \
161 while (arg->tok == tok_minus1 || arg->tok == tok_number) \
163 fn (ldfile, result, nowtok, arg, charset); \
164 arg = lr_token (ldfile, charset); \
165 if (arg->tok != tok_semicolon) \
167 arg = lr_token (ldfile, charset); \
169 if (arg->tok != tok_eol) \
174 #define SYNTAX_ERROR(string) \
175 lr_error (ldfile, string); \
176 lr_ignore_rest (ldfile, 0);
179 /* Parse locale definition file and store result in RESULT. */
184 struct token *now = lr_token (ldfile, charset);
185 enum token_t nowtok = now->tok;
188 if (nowtok == tok_eof)
194 /* The beginning. We expect the special declarations, EOL or
195 the start of any locale. */
196 if (nowtok == tok_eol)
197 /* Ignore empty lines. */
202 case tok_escape_char:
203 case tok_comment_char:
204 /* We need an argument. */
205 arg = lr_token (ldfile, charset);
207 if (arg->tok != tok_ident)
209 SYNTAX_ERROR (_("bad argument"));
213 if (arg->val.str.len != 1)
215 lr_error (ldfile, _("\
216 argument to `%s' must be a single character"),
217 nowtok == tok_escape_char ? "escape_char"
220 lr_ignore_rest (ldfile, 0);
224 if (nowtok == tok_escape_char)
225 ldfile->escape_char = *arg->val.str.start;
227 ldfile->comment_char = *arg->val.str.start;
238 case tok_lc_monetary:
250 case tok_lc_messages:
256 syntax error: not inside a locale definition section"));
259 lr_ignore_rest (ldfile, 1);
263 HANDLE_COPY (LC_CTYPE, tok_lc_ctype, "LC_CYTPE");
265 ctype_startup (ldfile, result, charset);
269 /* Here we accept all the character classes, tolower/toupper,
270 and following ANSI C:1995 self-defined classes. */
271 LOCALE_PROLOG (tok_lc_ctype, "LC_CTYPE");
273 if (nowtok == tok_charclass)
275 READ_STRING_LIST (ctype_class_new, bad_new_charclass);
279 syntax error in definition of new character class"));
283 if (nowtok == tok_charconv)
285 READ_STRING_LIST (ctype_map_new, bad_new_charconv);
289 syntax error in definition of new character map"));
293 if (nowtok == tok_upper || nowtok == tok_lower
294 || nowtok == tok_alpha || nowtok == tok_digit
295 || nowtok == tok_alnum || nowtok == tok_space
296 || nowtok == tok_cntrl || nowtok == tok_punct
297 || nowtok == tok_graph || nowtok == tok_print
298 || nowtok == tok_xdigit || nowtok == tok_blank)
300 ctype_tok_sym = nowtok;
301 ctype_tok_str = NULL;
306 if (nowtok == tok_toupper|| nowtok == tok_tolower)
308 ctype_tok_sym = nowtok;
309 ctype_tok_str = NULL;
314 if (nowtok != tok_ident)
317 /* We possibly have a self-defined character class. */
318 if (ctype_is_charclass (ldfile, result, now->val.str.start))
320 ctype_tok_sym = nowtok;
321 ctype_tok_str = now->val.str.start;
326 /* ...or a self-defined character map. */
327 if (ctype_is_charconv (ldfile, result, now->val.str.start))
329 ctype_tok_sym = nowtok;
330 ctype_tok_str = now->val.str.start;
335 SYNTAX_ERROR (_("syntax error in definition of LC_CTYPE category"));
339 /* Handle `END xxx'. */
340 if (nowtok != expected_tok)
341 lr_error (ldfile, _("\
342 `%1$s' definition does not end with `END %1$s'"), expected_str);
344 lr_ignore_rest (ldfile, nowtok == expected_tok);
349 /* Here we expect a semicolon separated list of bsymbols. The
350 bit to be set in the word is given in CHARCLASS_BIT. */
353 ctype_class_start (ldfile, result, ctype_tok_sym, ctype_tok_str,
356 while (arg->tok != tok_eol)
358 /* Any token other than a bsymbol is an error. */
359 if (arg->tok != tok_bsymbol)
363 syntax error in character class definition"));
367 /* Lookup value for token and write into array. */
368 ctype_class_from (ldfile, result, arg, charset);
370 arg = lr_token (ldfile, charset);
371 if (arg->tok == tok_semicolon)
372 arg = lr_token (ldfile, charset);
373 else if (arg->tok != tok_eol)
376 /* Look for ellipsis. */
377 if (arg->tok == tok_ellipsis)
379 arg = lr_token (ldfile, charset);
380 if (arg->tok != tok_semicolon)
383 arg = lr_token (ldfile, charset);
384 if (arg->tok != tok_bsymbol)
387 /* Write range starting at LAST to ARG->VAL. */
388 ctype_class_to (ldfile, result, arg, charset);
390 arg = lr_token (ldfile, charset);
391 if (arg->tok == tok_semicolon)
392 arg = lr_token (ldfile, charset);
393 else if (arg->tok != tok_eol)
398 /* Mark class as already seen. */
399 ctype_class_end (ldfile, result);
405 /* Here we expect a list of character mappings. Note: the
406 first opening brace is already matched. */
407 ctype_map_start (ldfile, result, ctype_tok_sym, ctype_tok_str,
412 /* Match ( bsymbol , bsymbol ) */
413 if (now->tok != tok_open_brace)
416 now = lr_token (ldfile, charset);
417 if (now->tok != tok_bsymbol)
421 syntax error in character conversion definition"));
426 /* Lookup arg and assign to FROM. */
427 ctype_map_from (ldfile, result, now, charset);
429 now = lr_token (ldfile, charset);
430 if (now->tok != tok_comma)
433 now = lr_token (ldfile, charset);
434 if (now->tok != tok_bsymbol)
437 /* Lookup arg and assign to TO. */
438 ctype_map_to (ldfile, result, now, charset);
440 now = lr_token (ldfile, charset);
441 if (now->tok != tok_close_brace)
444 now = lr_token (ldfile, charset);
445 if (now->tok == tok_eol)
450 if (now->tok != tok_semicolon)
453 now = lr_token (ldfile, charset);
456 ctype_map_end (ldfile, result);
461 /* We have seen `copy'. First match the argument. */
464 if (nowtok != tok_string)
465 lr_error (ldfile, _("expect string argument for `copy'"));
467 def_to_process (now->val.str.start, 1 << copy_category);
469 lr_ignore_rest (ldfile, nowtok == tok_string);
471 /* The rest of the line must be empty
472 and the next keyword must be `END xxx'. */
474 while (lr_token (ldfile, charset)->tok != tok_end)
479 lr_error (ldfile, _("\
480 no other keyword shall be specified when `copy' is used"));
484 lr_ignore_rest (ldfile, 0);
492 HANDLE_COPY (LC_COLLATE, tok_lc_collate, "LC_COLLATE");
494 collate_startup (ldfile, result, charset);
498 /* Process the LC_COLLATE section. We expect `END LC_COLLATE'
499 any of the collation specifications, or any bsymbol. */
500 LOCALE_PROLOG (tok_lc_collate, "LC_COLLATE");
502 if (nowtok == tok_order_start)
508 if (nowtok != tok_collating_element
509 && nowtok != tok_collating_symbol)
512 lr_error (ldfile, _("\
513 syntax error in collation definition"));
514 lr_ignore_rest (ldfile, 0);
519 arg = lr_token (ldfile, charset);
520 if (arg->tok != tok_bsymbol)
522 lr_error (ldfile, _("\
523 collation symbol expected after `%s'"),
524 nowtok == tok_collating_element
525 ? "collating-element" : "collating-symbol");
526 lr_ignore_rest (ldfile, 0);
530 if (nowtok == tok_collating_element)
532 /* Save to-value as new name. */
533 collate_element_to (ldfile, result, arg, charset);
535 arg = lr_token (ldfile, charset);
536 if (arg->tok != tok_from)
538 lr_error (ldfile, _("\
539 `from' expected after first argument to `collating-element'"));
540 lr_ignore_rest (ldfile, 0);
544 arg = lr_token (ldfile, charset);
545 if (arg->tok != tok_string)
547 lr_error (ldfile, _("\
548 from-value of `collating-element' must be a string"));
549 lr_ignore_rest (ldfile, 0);
553 /* Enter new collating element. */
554 collate_element_from (ldfile, result, arg, charset);
557 /* Enter new collating symbol into table. */
558 collate_symbol (ldfile, result, arg, charset);
560 lr_ignore_rest (ldfile, 1);
564 /* We parse the rest of the line containing `order_start'.
565 In any case we continue with parsing the symbols. */
569 while (now->tok != tok_eol)
571 int collation_method = 0;
577 if (now->tok == tok_forward)
578 collation_method |= sort_forward;
579 else if (now->tok == tok_backward)
580 collation_method |= sort_backward;
581 else if (now->tok == tok_position)
582 collation_method |= sort_position;
585 lr_error (ldfile, _("unknown collation directive"));
586 lr_ignore_rest (ldfile, 0);
590 now = lr_token (ldfile, charset);
592 while (now->tok == tok_comma
593 && (now == lr_token (ldfile, charset) != tok_none));
595 /* Check for consistency: forward and backwards are
596 mutually exclusive. */
597 if ((collation_method & sort_forward) != 0
598 && (collation_method & sort_backward) != 0)
600 lr_error (ldfile, _("\
601 sorting order `forward' and `backward' are mutually exclusive"));
602 /* The recover clear the backward flag. */
603 collation_method &= ~sort_backward;
606 /* ??? I don't know whether this is correct but while
607 thinking about the `strcoll' functions I found that I
608 need a direction when performing position depended
609 collation. So I assume here that implicitly the
610 direction `forward' is given when `position' alone is
611 written. --drepper */
612 if (collation_method == sort_position)
613 collation_method |= sort_forward;
615 /* Enter info about next collation order. */
616 collate_new_order (ldfile, result, collation_method);
618 if (now->tok != tok_eol && now->tok != tok_semicolon)
620 lr_error (ldfile, _("\
621 syntax error in `order_start' directive"));
622 lr_ignore_rest (ldfile, 0);
626 if (now->tok == tok_semicolon)
627 now = lr_token (ldfile, charset);
630 /* If no argument to `order_start' is given, one `forward'
631 argument is implicitely assumed. */
633 collate_new_order (ldfile, result, sort_forward);
636 /* We now know about all sorting rules. */
637 collate_build_arrays (ldfile, result);
642 /* We read one symbol a line until `order_end' is found. */
644 static int last_correct = 1;
646 if (nowtok == tok_order_end)
649 lr_ignore_rest (ldfile, 1);
653 /* Ignore empty lines. */
654 if (nowtok == tok_eol)
657 if (nowtok != tok_bsymbol && nowtok != tok_undefined
658 && nowtok != tok_ellipsis)
660 if (last_correct == 1)
662 lr_error (ldfile, _("\
663 syntax error in collating order definition"));
666 lr_ignore_rest (ldfile, 0);
673 /* Remember current token. */
674 if (collate_order_elem (ldfile, result, now, charset) < 0)
678 /* Read optional arguments. */
679 arg = lr_token (ldfile, charset);
680 while (arg->tok != tok_eol)
682 if (arg->tok != tok_ignore && arg->tok != tok_ellipsis
683 && arg->tok != tok_bsymbol && arg->tok != tok_string)
686 if (arg->tok == tok_ignore || arg->tok == tok_ellipsis
687 || arg->tok == tok_string)
689 /* Call handler for simple weights. */
690 if (collate_simple_weight (ldfile, result, arg, charset)
694 arg = lr_token (ldfile, charset);
700 int ok = collate_weight_bsymbol (ldfile, result, arg,
705 arg = lr_token (ldfile, charset);
707 while (arg->tok == tok_bsymbol);
709 /* Are there more weights? */
710 if (arg->tok != tok_semicolon)
713 /* Yes, prepare next weight. */
714 if (collate_next_weight (ldfile, result) < 0)
717 arg = lr_token (ldfile, charset);
720 if (arg->tok != tok_eol)
722 SYNTAX_ERROR (_("syntax error in order specification"));
725 collate_end_weight (ldfile, result);
731 /* Following to the `order_end' keyword we don't expect
732 anything but the `END'. */
733 if (nowtok == tok_eol)
736 if (nowtok != tok_end)
739 expected_tok = tok_lc_collate;
740 expected_str = "LC_COLLATE";
743 ldfile->translate_strings = 1;
747 HANDLE_COPY (LC_MONETARY, tok_lc_monetary, "LC_MONETARY");
749 monetary_startup (ldfile, result, charset);
753 LOCALE_PROLOG (tok_lc_monetary, "LC_MONETARY");
757 case tok_int_curr_symbol:
758 case tok_currency_symbol:
759 case tok_mon_decimal_point:
760 case tok_mon_thousands_sep:
761 case tok_positive_sign:
762 case tok_negative_sign:
763 READ_STRING (monetary_add, bad_monetary);
766 case tok_int_frac_digits:
767 case tok_frac_digits:
768 case tok_p_cs_precedes:
769 case tok_p_sep_by_space:
770 case tok_n_cs_precedes:
771 case tok_n_sep_by_space:
772 case tok_p_sign_posn:
773 case tok_n_sign_posn:
774 READ_NUMBER (monetary_add, bad_monetary);
777 case tok_mon_grouping:
778 /* We have a semicolon separated list of integers. */
779 READ_NUMBER_LIST (monetary_add, bad_monetary);
784 SYNTAX_ERROR (_("syntax error in monetary locale definition"));
789 HANDLE_COPY (LC_NUMERIC, tok_lc_numeric, "LC_NUMERIC");
791 numeric_startup (ldfile, result, charset);
795 LOCALE_PROLOG (tok_lc_numeric, "LC_NUMERIC");
799 case tok_decimal_point:
800 case tok_thousands_sep:
801 READ_STRING (numeric_add, bad_numeric);
805 /* We have a semicolon separated list of integers. */
806 READ_NUMBER_LIST (numeric_add, bad_numeric);
811 SYNTAX_ERROR (_("syntax error in numeric locale definition"));
816 HANDLE_COPY (LC_TIME, tok_lc_time, "LC_TIME");
818 time_startup (ldfile, result, charset);
822 LOCALE_PROLOG (tok_lc_time, "LC_TIME");
832 READ_STRING_LIST (time_add, bad_time);
841 case tok_era_d_t_fmt:
844 READ_STRING (time_add, bad_time);
849 SYNTAX_ERROR (_("syntax error in time locale definition"));
854 HANDLE_COPY (LC_MESSAGES, tok_lc_messages, "LC_MESSAGES");
856 messages_startup (ldfile, result, charset);
860 LOCALE_PROLOG (tok_lc_messages, "LC_MESSAGES");
868 READ_STRING (messages_add, bad_message);
873 SYNTAX_ERROR (_("syntax error in message locale definition"));
878 error (5, 0, _("%s: error in state machine"), __FILE__);
885 /* We read all of the file. */
888 /* Let's see what information is available. */
889 for (cnt = LC_CTYPE; cnt <= LC_MESSAGES; ++cnt)
890 if (result->categories[cnt].generic != NULL)
891 result->avail |= 1 << cnt;
898 check_all_categories (struct localedef_t *locale, struct charset_t *charset)
900 /* Call the finishing functions for all locales. */
901 if ((locale->binary & (1 << LC_CTYPE)) == 0)
902 ctype_finish (locale, charset);
903 if ((locale->binary & (1 << LC_COLLATE)) == 0)
904 collate_finish (locale, charset);
905 if ((locale->binary & (1 << LC_MONETARY)) == 0)
906 monetary_finish (locale);
907 if ((locale->binary & (1 << LC_NUMERIC)) == 0)
908 numeric_finish (locale);
909 if ((locale->binary & (1 << LC_TIME)) == 0)
910 time_finish (locale);
911 if ((locale->binary & (1 << LC_MESSAGES)) == 0)
912 messages_finish (locale);
917 write_all_categories (struct localedef_t *locale, struct charset_t *charset,
918 const char *output_path)
920 /* Call all functions to write locale data. */
921 ctype_output (locale, charset, output_path);
922 collate_output (locale, output_path);
923 monetary_output (locale, output_path);
924 numeric_output (locale, output_path);
925 time_output (locale, output_path);
926 messages_output (locale, output_path);
931 write_locale_data (const char *output_path, const char *category,
932 size_t n_elem, struct iovec *vec)
934 size_t cnt, step, maxiov;
938 fname = malloc (strlen (output_path) + strlen (category) + 6);
940 error (5, errno, _("memory exhausted"));
942 /* Normally we write to the directory pointed to by the OUTPUT_PATH.
943 But for LC_MESSAGES we have to take care for the translation
944 data. This means we need to have a directory LC_MESSAGES in
945 which we place the file under the name SYS_LC_MESSAGES. */
946 if (strcmp (category, "LC_MESSAGES") == 0)
950 sprintf (fname, "%s%s", output_path, category);
951 fd = creat (fname, 0666);
956 int save_err = errno;
960 sprintf (fname, "%1$s%2$s/SYS_%2$s", output_path, category);
961 fd = creat (fname, 0666);
968 error (0, save_err, _("cannot open output file for category `%s'"),
978 maxiov = sysconf (_SC_UIO_MAXIOV);
981 /* Write the data using writev. But we must take care for the
982 limitation of the implementation. */
983 for (cnt = 0; cnt < n_elem; cnt += step)
987 step = MIN (maxiov, step);
989 if (writev (fd, &vec[cnt], step) < 0)
991 error (0, errno, _("failure while writing data for category `%s'"),