1 /* xgettext C# backend.
2 Copyright (C) 2003, 2005-2009 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 #include "error-progname.h"
38 #include "xvasprintf.h"
40 #include "po-charset.h"
44 #define _(s) gettext(s)
46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
49 /* The C# syntax is defined in ECMA-334, second edition. */
52 /* ====================== Keyword set customization. ====================== */
54 /* If true extract all strings. */
55 static bool extract_all = false;
57 static hash_table keywords;
58 static bool default_keywords = true;
62 x_csharp_extract_all ()
68 /* Processes a --keyword option.
69 Non-ASCII function names can be used if given in UTF-8 encoding. */
71 x_csharp_keyword (const char *name)
74 default_keywords = false;
78 struct callshape shape;
81 if (keywords.table == NULL)
82 hash_init (&keywords, 100);
84 split_keywordspec (name, &end, &shape);
86 /* The characters between name and end should form a valid C#
87 identifier sequence with dots.
88 A colon means an invalid parse in split_keywordspec(). */
89 colon = strchr (name, ':');
90 if (colon == NULL || colon >= end)
91 insert_keyword_callshape (&keywords, name, end - name, &shape);
95 /* Finish initializing the keywords hash table.
96 Called after argument processing, before each file is processed. */
100 if (default_keywords)
102 /* When adding new keywords here, also update the documentation in
104 x_csharp_keyword ("GetString"); /* Resource{Manager,Set}.GetString */
105 x_csharp_keyword ("GetPluralString:1,2"); /* GettextResource{Manager,Set}.GetPluralString */
106 x_csharp_keyword ("GetParticularString:1c,2"); /* Resource{Manager,Set}.GetParticularString */
107 x_csharp_keyword ("GetParticularPluralString:1c,2,3"); /* Resource{Manager,Set}.GetParticularPluralString */
108 default_keywords = false;
113 init_flag_table_csharp ()
115 xgettext_record_flag ("GetString:1:pass-csharp-format");
116 xgettext_record_flag ("GetPluralString:1:pass-csharp-format");
117 xgettext_record_flag ("GetPluralString:2:pass-csharp-format");
118 xgettext_record_flag ("GetParticularString:2:pass-csharp-format");
119 xgettext_record_flag ("GetParticularPluralString:2:pass-csharp-format");
120 xgettext_record_flag ("GetParticularPluralString:3:pass-csharp-format");
121 xgettext_record_flag ("String.Format:1:csharp-format");
125 /* ======================== Reading of characters. ======================== */
127 /* Real filename, used in error messages about the input file. */
128 static const char *real_file_name;
130 /* Logical filename and line number, used to label the extracted messages. */
131 static char *logical_file_name;
132 static int line_number;
134 /* The input file stream. */
138 /* Phase 1: line_number handling. */
140 /* Maximum used, roughly a safer MB_LEN_MAX. */
141 #define MAX_PHASE1_PUSHBACK 16
142 static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
143 static int phase1_pushback_length;
145 /* Read the next single byte from the input file. */
151 if (phase1_pushback_length)
153 c = phase1_pushback[--phase1_pushback_length];
163 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
173 /* Supports MAX_PHASE1_PUSHBACK characters of pushback. */
175 phase1_ungetc (int c)
181 if (phase1_pushback_length == SIZEOF (phase1_pushback))
183 phase1_pushback[phase1_pushback_length++] = c;
188 /* Phase 2: Conversion to Unicode.
189 This is done early because ECMA-334 section 9.1. says that the source is
190 "an ordered sequence of Unicode characters", and because the recognition
191 of the line terminators (ECMA-334 section 9.3.1) is hardly possible without
192 prior conversion to Unicode. */
194 /* End-of-file indicator for functions returning an UCS-4 character. */
197 /* Newline Unicode character. */
200 static lexical_context_ty lexical_context;
202 static int phase2_pushback[1];
203 static int phase2_pushback_length;
205 /* Read the next Unicode UCS-4 character from the input file. */
209 if (phase2_pushback_length)
210 return phase2_pushback[--phase2_pushback_length];
212 if (xgettext_current_source_encoding == po_charset_ascii)
214 int c = phase1_getc ();
219 multiline_error (xstrdup (""),
220 xasprintf ("%s\n%s\n",
221 non_ascii_error_message (lexical_context,
225 Please specify the source encoding through --from-code.")));
230 else if (xgettext_current_source_encoding != po_charset_utf8)
233 /* Use iconv on an increasing number of bytes. Read only as many bytes
234 through phase1_getc as needed. This is needed to give reasonable
235 interactive behaviour when fp is connected to an interactive tty. */
236 unsigned char buf[MAX_PHASE1_PUSHBACK];
238 int c = phase1_getc ();
241 buf[0] = (unsigned char) c;
246 unsigned char scratchbuf[6];
247 const char *inptr = (const char *) &buf[0];
248 size_t insize = bufcount;
249 char *outptr = (char *) &scratchbuf[0];
250 size_t outsize = sizeof (scratchbuf);
252 size_t res = iconv (xgettext_current_source_iconv,
253 (ICONV_CONST char **) &inptr, &insize,
255 /* We expect that a character has been produced if and only if
256 some input bytes have been consumed. */
257 if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
259 if (outsize == sizeof (scratchbuf))
261 /* No character has been produced. Must be an error. */
262 if (res != (size_t)(-1))
267 /* An invalid multibyte sequence was encountered. */
268 multiline_error (xstrdup (""),
270 %s:%d: Invalid multibyte sequence.\n\
271 Please specify the correct source encoding through --from-code.\n"),
272 real_file_name, line_number));
275 else if (errno == EINVAL)
277 /* An incomplete multibyte character. */
280 if (bufcount == MAX_PHASE1_PUSHBACK)
282 /* An overlong incomplete multibyte sequence was
284 multiline_error (xstrdup (""),
286 %s:%d: Long incomplete multibyte sequence.\n\
287 Please specify the correct source encoding through --from-code.\n"),
288 real_file_name, line_number));
292 /* Read one more byte and retry iconv. */
296 multiline_error (xstrdup (""),
298 %s:%d: Incomplete multibyte sequence at end of file.\n\
299 Please specify the correct source encoding through --from-code.\n"),
300 real_file_name, line_number));
305 multiline_error (xstrdup (""),
307 %s:%d: Incomplete multibyte sequence at end of line.\n\
308 Please specify the correct source encoding through --from-code.\n"),
309 real_file_name, line_number - 1));
312 buf[bufcount++] = (unsigned char) c;
315 error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
316 real_file_name, line_number);
320 size_t outbytes = sizeof (scratchbuf) - outsize;
321 size_t bytes = bufcount - insize;
324 /* We expect that one character has been produced. */
329 /* Push back the unused bytes. */
331 phase1_ungetc (buf[--insize]);
332 /* Convert the character from UTF-8 to UCS-4. */
333 if (u8_mbtouc (&uc, scratchbuf, outbytes) < outbytes)
335 /* scratchbuf contains an out-of-range Unicode character
337 multiline_error (xstrdup (""),
339 %s:%d: Invalid multibyte sequence.\n\
340 Please specify the source encoding through --from-code.\n"),
341 real_file_name, line_number));
348 /* If we don't have iconv(), the only supported values for
349 xgettext_global_source_encoding and thus also for
350 xgettext_current_source_encoding are ASCII and UTF-8. */
356 /* Read an UTF-8 encoded character. */
357 unsigned char buf[6];
378 && ((buf[1] ^ 0x80) < 0x40))
388 && ((buf[1] ^ 0x80) < 0x40)
389 && ((buf[2] ^ 0x80) < 0x40))
399 && ((buf[1] ^ 0x80) < 0x40)
400 && ((buf[2] ^ 0x80) < 0x40)
401 && ((buf[3] ^ 0x80) < 0x40))
411 && ((buf[1] ^ 0x80) < 0x40)
412 && ((buf[2] ^ 0x80) < 0x40)
413 && ((buf[3] ^ 0x80) < 0x40)
414 && ((buf[4] ^ 0x80) < 0x40))
423 u8_mbtouc (&uc, buf, count);
428 /* Supports only one pushback character. */
430 phase2_ungetc (int c)
434 if (phase2_pushback_length == SIZEOF (phase2_pushback))
436 phase2_pushback[phase2_pushback_length++] = c;
441 /* Phase 3: Convert all line terminators to LF.
442 See ECMA-334 section 9.3.1. */
444 /* Line number defined in terms of phase3. */
445 static int logical_line_number;
447 static int phase3_pushback[9];
448 static int phase3_pushback_length;
450 /* Read the next Unicode UCS-4 character from the input file, mapping
451 all line terminators to U+000A, and dropping U+001A at the end of file. */
457 if (phase3_pushback_length)
459 c = phase3_pushback[--phase3_pushback_length];
461 ++logical_line_number;
469 int c1 = phase2_getc ();
471 if (c1 != UEOF && c1 != 0x000a)
474 /* Seen line terminator CR or CR/LF. */
475 ++logical_line_number;
479 if (c == 0x0085 || c == 0x2028 || c == 0x2029)
481 /* Seen Unicode word processor newline. */
482 ++logical_line_number;
488 int c1 = phase2_getc ();
491 /* Seen U+001A right before the end of file. */
498 ++logical_line_number;
502 /* Supports 9 characters of pushback. */
504 phase3_ungetc (int c)
509 --logical_line_number;
510 if (phase3_pushback_length == SIZEOF (phase3_pushback))
512 phase3_pushback[phase3_pushback_length++] = c;
517 /* ========================= Accumulating strings. ======================== */
519 /* A string buffer type that allows appending Unicode characters.
520 Returns the entire string in UTF-8 encoding. */
524 /* The part of the string that has already been converted to UTF-8. */
527 size_t utf8_allocated;
530 /* Initialize a 'struct string_buffer' to empty. */
532 init_string_buffer (struct string_buffer *bp)
534 bp->utf8_buffer = NULL;
536 bp->utf8_allocated = 0;
539 /* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
541 string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count)
543 if (bp->utf8_buflen + count > bp->utf8_allocated)
545 size_t new_allocated = 2 * bp->utf8_allocated + 10;
546 if (new_allocated < bp->utf8_buflen + count)
547 new_allocated = bp->utf8_buflen + count;
548 bp->utf8_allocated = new_allocated;
549 bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
553 /* Auxiliary function: Append a Unicode character to bp->utf8.
554 uc must be < 0x110000. */
556 string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc)
558 unsigned char utf8buf[6];
559 int count = u8_uctomb (utf8buf, uc, 6);
562 /* The caller should have ensured that uc is not out-of-range. */
565 string_buffer_append_unicode_grow (bp, count);
566 memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
567 bp->utf8_buflen += count;
570 /* Return the string buffer's contents. */
572 string_buffer_result (struct string_buffer *bp)
574 /* NUL-terminate it. */
575 string_buffer_append_unicode_grow (bp, 1);
576 bp->utf8_buffer[bp->utf8_buflen] = '\0';
578 return bp->utf8_buffer;
581 /* Free the memory pointed to by a 'struct string_buffer'. */
583 free_string_buffer (struct string_buffer *bp)
585 free (bp->utf8_buffer);
589 /* ======================== Accumulating comments. ======================== */
592 /* Accumulating a single comment line. */
594 static struct string_buffer comment_buffer;
599 lexical_context = lc_comment;
600 comment_buffer.utf8_buflen = 0;
606 return (comment_buffer.utf8_buflen == 0);
612 string_buffer_append_unicode (&comment_buffer, c);
616 comment_line_end (size_t chars_to_remove)
618 char *buffer = string_buffer_result (&comment_buffer);
619 size_t buflen = strlen (buffer);
621 buflen -= chars_to_remove;
623 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
625 buffer[buflen] = '\0';
626 savable_comment_add (buffer);
627 lexical_context = lc_outside;
631 /* These are for tracking whether comments count as immediately before
633 static int last_comment_line;
634 static int last_non_comment_line;
637 /* Phase 4: Replace each comment that is not inside a character constant or
638 string literal with a space or newline character.
639 See ECMA-334 section 9.3.2. */
659 /* C style comment. */
661 last_was_star = false;
667 /* We skip all leading white space, but not EOLs. */
668 if (!(comment_at_start () && (c == ' ' || c == '\t')))
673 comment_line_end (1);
675 last_was_star = false;
679 last_was_star = true;
685 comment_line_end (2);
691 last_was_star = false;
696 last_comment_line = logical_line_number;
700 /* C++ style comment. */
701 last_comment_line = logical_line_number;
706 if (c == UNL || c == UEOF)
708 /* We skip all leading white space, but not EOLs. */
709 if (!(comment_at_start () && (c == ' ' || c == '\t')))
712 phase3_ungetc (c); /* push back the newline, to decrement logical_line_number */
713 comment_line_end (0);
714 phase3_getc (); /* read the newline again */
719 /* Supports only one pushback character. */
721 phase4_ungetc (int c)
727 /* ======================= Character classification. ====================== */
730 /* Return true if a given character is white space.
731 See ECMA-334 section 9.3.3. */
733 is_whitespace (int c)
735 /* Unicode character class Zs, as of Unicode 4.0. */
736 /* grep '^[^;]*;[^;]*;Zs;' UnicodeData-4.0.0.txt */
740 return (c == 0x0020 || c == 0x00a0);
742 return (c == 0x1680);
744 return (c == 0x180e);
746 return ((c >= 0x2000 && c <= 0x200b) || c == 0x202f || c == 0x205f);
748 return (c == 0x3000);
755 /* C# allows identifiers containing many Unicode characters. We recognize
756 them; to use an identifier with Unicode characters in a --keyword option,
757 it must be specified in UTF-8. */
760 bitmap_lookup (const void *table, unsigned int uc)
762 unsigned int index1 = uc >> 16;
763 if (index1 < ((const int *) table)[0])
765 int lookup1 = ((const int *) table)[1 + index1];
768 unsigned int index2 = (uc >> 9) & 0x7f;
769 int lookup2 = ((const int *) table)[lookup1 + index2];
772 unsigned int index3 = (uc >> 5) & 0xf;
773 unsigned int lookup3 = ((const int *) table)[lookup2 + index3];
775 return (lookup3 >> (uc & 0x1f)) & 1;
782 /* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, as of Unicode 4.0,
783 plus the underscore. */
790 /*unsigned*/ int level3[34 << 4];
792 table_identifier_start =
797 388, 404, 420, 436, 452, 468, 484, 500,
798 516, 532, 548, 564, 580, -1, 596, 612,
799 628, -1, -1, -1, -1, -1, -1, -1,
800 644, -1, 660, 660, 660, 660, 660, 660,
801 660, 660, 660, 660, 660, 660, 676, 660,
802 660, 660, 660, 660, 660, 660, 660, 660,
803 660, 660, 660, 660, 660, 660, 660, 660,
804 660, 660, 660, 660, 660, 660, 660, 660,
805 660, 660, 660, 660, 660, 660, 660, 660,
806 660, 660, 660, 660, 660, 660, 660, 692,
807 660, 660, 708, -1, -1, -1, 660, 660,
808 660, 660, 660, 660, 660, 660, 660, 660,
809 660, 660, 660, 660, 660, 660, 660, 660,
810 660, 660, 660, 724, -1, -1, -1, -1,
811 -1, -1, -1, -1, -1, -1, -1, -1,
812 -1, -1, -1, -1, 740, 756, 772, 788,
813 804, 820, 836, -1, 852, -1, -1, -1,
814 -1, -1, -1, -1, -1, -1, -1, -1,
815 -1, -1, -1, -1, -1, -1, -1, -1,
816 -1, -1, -1, -1, -1, -1, -1, -1,
817 -1, -1, -1, -1, -1, -1, -1, -1,
818 -1, -1, -1, -1, -1, -1, -1, -1,
819 -1, -1, -1, -1, -1, -1, -1, -1,
820 -1, -1, -1, -1, -1, -1, -1, -1,
821 -1, -1, -1, -1, -1, -1, -1, -1,
822 -1, -1, -1, -1, -1, -1, -1, -1,
823 -1, -1, -1, -1, -1, -1, -1, -1,
824 -1, -1, -1, -1, -1, -1, -1, -1,
825 -1, -1, -1, -1, -1, -1, -1, -1,
826 -1, -1, 868, 884, -1, -1, -1, -1,
827 -1, -1, -1, -1, -1, -1, -1, -1,
828 -1, -1, -1, -1, -1, -1, -1, -1,
829 660, 660, 660, 660, 660, 660, 660, 660,
830 660, 660, 660, 660, 660, 660, 660, 660,
831 660, 660, 660, 660, 660, 660, 660, 660,
832 660, 660, 660, 660, 660, 660, 660, 660,
833 660, 660, 660, 660, 660, 660, 660, 660,
834 660, 660, 660, 660, 660, 660, 660, 660,
835 660, 660, 660, 660, 660, 660, 660, 660,
836 660, 660, 660, 660, 660, 660, 660, 660,
837 660, 660, 660, 660, 660, 660, 660, 660,
838 660, 660, 660, 660, 660, 660, 660, 660,
839 660, 660, 660, 900, -1, -1, -1, -1,
840 -1, -1, -1, -1, -1, -1, -1, -1,
841 -1, -1, -1, -1, -1, -1, -1, -1,
842 -1, -1, -1, -1, -1, -1, -1, -1,
843 -1, -1, -1, -1, -1, -1, -1, -1,
844 -1, -1, -1, -1, 660, 916, -1, -1
847 0x00000000, 0x00000000, 0x87FFFFFE, 0x07FFFFFE,
848 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF,
849 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
850 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
851 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
852 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
853 0x00000000, 0x00000000, 0x00000000, 0x04000000,
854 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
855 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
856 0xFFFFFC03, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
857 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
858 0x000000FF, 0x00000000, 0xFFFF0000, 0x000707FF,
859 0x00000000, 0x07FFFFFE, 0x000007FF, 0xFFFEC000,
860 0xFFFFFFFF, 0xFFFFFFFF, 0x002FFFFF, 0x9C00C060,
861 0xFFFD0000, 0x0000FFFF, 0x0000E000, 0x00000000,
862 0xFFFFFFFF, 0x0002003F, 0x00000000, 0x00000000,
863 0x00000000, 0x00000000, 0x00000000, 0x00000000,
864 0x00000000, 0x00000000, 0x00000000, 0x00000000,
865 0xFFFFFFF0, 0x23FFFFFF, 0xFF010000, 0x00000003,
866 0xFFF99FE0, 0x23C5FDFF, 0xB0000000, 0x00030003,
867 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
868 0xFFFBBFE0, 0x23EDFDFF, 0x00010000, 0x00000003,
869 0xFFF99FE0, 0x23EDFDFF, 0xB0000000, 0x00020003,
870 0xD63DC7E8, 0x03BFC718, 0x00000000, 0x00000000,
871 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
872 0xFFFDDFE0, 0x23EFFDFF, 0x40000000, 0x00000003,
873 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
874 0xFC7FFFE0, 0x2FFBFFFF, 0x0000007F, 0x00000000,
875 0xFFFFFFFE, 0x000DFFFF, 0x0000007F, 0x00000000,
876 0xFEF02596, 0x200DECAE, 0x3000005F, 0x00000000,
877 0x00000001, 0x00000000, 0xFFFFFEFF, 0x000007FF,
878 0x00000F00, 0x00000000, 0x00000000, 0x00000000,
879 0xFFFFFFFF, 0x000006FB, 0x003F0000, 0x00000000,
880 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
881 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
882 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
883 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
884 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
885 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x00000000,
886 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
887 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
888 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
889 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
890 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
891 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
892 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
893 0x0003DFFF, 0x0003FFFF, 0x0003FFFF, 0x0001DFFF,
894 0xFFFFFFFF, 0x000FFFFF, 0x10800000, 0x00000000,
895 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
896 0xFFFFFFFF, 0x000001FF, 0x00000000, 0x00000000,
897 0x1FFFFFFF, 0x00000000, 0xFFFF0000, 0x001F3FFF,
898 0x00000000, 0x00000000, 0x00000000, 0x00000000,
899 0x00000000, 0x00000000, 0x00000000, 0x00000000,
900 0x00000000, 0x00000000, 0x00000000, 0x00000000,
901 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
902 0x00000000, 0x00000000, 0x00000000, 0x00000000,
903 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
904 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
905 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
906 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
907 0x00000000, 0x00000000, 0x00000000, 0x80020000,
908 0x00000000, 0x00000000, 0x00000000, 0x00000000,
909 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
910 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
911 0x000000E0, 0x1F3E03FE, 0xFFFFFFFE, 0xFFFFFFFF,
912 0xE07FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xF7FFFFFF,
913 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
914 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
915 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
916 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
917 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
918 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
919 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
920 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
921 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
922 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
923 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
924 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
925 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
926 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
927 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
928 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
929 0x00000000, 0x00000000, 0x00000000, 0x00000000,
930 0x00000000, 0x00000000, 0x00000000, 0x00000000,
931 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
932 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
933 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
934 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
935 0x00000000, 0x00000000, 0x00000000, 0x00000000,
936 0x00000000, 0x00000000, 0x00000000, 0x00000000,
937 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
938 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
939 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
940 0x00000000, 0x00000000, 0x00000000, 0x00000000,
941 0xA0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
942 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
943 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
944 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
945 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
946 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
947 0x00000000, 0x00000000, 0x00000000, 0xFFDF0000,
948 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x1FFFFFFF,
949 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0xFFFFFFC0,
950 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x00000000,
951 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
952 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
953 0x00000000, 0x00000000, 0x00000000, 0x00000000,
954 0x00000000, 0x00000000, 0x00000000, 0x00000000,
955 0x00000000, 0x00000000, 0x00000000, 0x00000000,
956 0x00000000, 0x00000000, 0x00000000, 0x00000000,
957 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
958 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
959 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
960 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
961 0x00000000, 0x00000000, 0x00000000, 0x00000000,
962 0x00000000, 0x00000000, 0x00000000, 0x00000000,
963 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
964 0x00000000, 0x00000000, 0x00000000, 0x00000000,
965 0x00000000, 0x00000000, 0x00000000, 0x00000000,
966 0x00000000, 0x00000000, 0x00000000, 0x00000000,
967 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
968 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
969 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
970 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
971 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
972 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
973 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
974 0xFFFFFDFF, 0xFFFFFDFF, 0x000003F7, 0x00000000,
975 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
976 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
977 0x00000000, 0x00000000, 0x00000000, 0x00000000,
978 0x00000000, 0x00000000, 0x00000000, 0x00000000,
979 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
980 0x00000000, 0x00000000, 0x00000000, 0x00000000,
981 0x00000000, 0x00000000, 0x00000000, 0x00000000,
982 0x00000000, 0x00000000, 0x00000000, 0x00000000
986 /* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, Nd, Pc, Mn, Mc, Cf,
987 as of Unicode 4.0. */
994 /*unsigned*/ int level3[36 << 4];
996 table_identifier_part =
1000 16, 144, 272, -1, -1, -1, -1, -1,
1001 -1, -1, -1, -1, -1, -1, 400
1004 528, 544, 560, 576, 592, 608, 624, 640,
1005 656, 672, 688, 704, 720, -1, 736, 752,
1006 768, -1, -1, -1, -1, -1, -1, -1,
1007 784, -1, 800, 800, 800, 800, 800, 800,
1008 800, 800, 800, 800, 800, 800, 816, 800,
1009 800, 800, 800, 800, 800, 800, 800, 800,
1010 800, 800, 800, 800, 800, 800, 800, 800,
1011 800, 800, 800, 800, 800, 800, 800, 800,
1012 800, 800, 800, 800, 800, 800, 800, 800,
1013 800, 800, 800, 800, 800, 800, 800, 832,
1014 800, 800, 848, -1, -1, -1, 800, 800,
1015 800, 800, 800, 800, 800, 800, 800, 800,
1016 800, 800, 800, 800, 800, 800, 800, 800,
1017 800, 800, 800, 864, -1, -1, -1, -1,
1018 -1, -1, -1, -1, -1, -1, -1, -1,
1019 -1, -1, -1, -1, 880, 896, 912, 928,
1020 944, 960, 976, -1, 992, -1, -1, -1,
1021 -1, -1, -1, -1, -1, -1, -1, -1,
1022 -1, -1, -1, -1, -1, -1, -1, -1,
1023 -1, -1, -1, -1, -1, -1, -1, -1,
1024 -1, -1, -1, -1, -1, -1, -1, -1,
1025 -1, -1, -1, -1, -1, -1, -1, -1,
1026 -1, -1, -1, -1, -1, -1, -1, -1,
1027 -1, -1, -1, -1, -1, -1, -1, -1,
1028 -1, -1, -1, -1, -1, -1, -1, -1,
1029 -1, -1, -1, -1, -1, -1, -1, -1,
1030 -1, -1, -1, -1, -1, -1, -1, -1,
1031 -1, -1, -1, -1, -1, -1, -1, -1,
1032 -1, -1, -1, -1, -1, -1, -1, -1,
1033 1008, -1, 1024, 1040, -1, -1, -1, -1,
1034 -1, -1, -1, -1, -1, -1, -1, -1,
1035 -1, -1, -1, -1, -1, -1, -1, -1,
1036 800, 800, 800, 800, 800, 800, 800, 800,
1037 800, 800, 800, 800, 800, 800, 800, 800,
1038 800, 800, 800, 800, 800, 800, 800, 800,
1039 800, 800, 800, 800, 800, 800, 800, 800,
1040 800, 800, 800, 800, 800, 800, 800, 800,
1041 800, 800, 800, 800, 800, 800, 800, 800,
1042 800, 800, 800, 800, 800, 800, 800, 800,
1043 800, 800, 800, 800, 800, 800, 800, 800,
1044 800, 800, 800, 800, 800, 800, 800, 800,
1045 800, 800, 800, 800, 800, 800, 800, 800,
1046 800, 800, 800, 1056, -1, -1, -1, -1,
1047 -1, -1, -1, -1, -1, -1, -1, -1,
1048 -1, -1, -1, -1, -1, -1, -1, -1,
1049 -1, -1, -1, -1, -1, -1, -1, -1,
1050 -1, -1, -1, -1, -1, -1, -1, -1,
1051 -1, -1, -1, -1, 800, 1072, -1, -1,
1052 1088, -1, -1, -1, -1, -1, -1, -1,
1053 -1, -1, -1, -1, -1, -1, -1, -1,
1054 -1, -1, -1, -1, -1, -1, -1, -1,
1055 -1, -1, -1, -1, -1, -1, -1, -1,
1056 -1, -1, -1, -1, -1, -1, -1, -1,
1057 -1, -1, -1, -1, -1, -1, -1, -1,
1058 -1, -1, -1, -1, -1, -1, -1, -1,
1059 -1, -1, -1, -1, -1, -1, -1, -1,
1060 -1, -1, -1, -1, -1, -1, -1, -1,
1061 -1, -1, -1, -1, -1, -1, -1, -1,
1062 -1, -1, -1, -1, -1, -1, -1, -1,
1063 -1, -1, -1, -1, -1, -1, -1, -1,
1064 -1, -1, -1, -1, -1, -1, -1, -1,
1065 -1, -1, -1, -1, -1, -1, -1, -1,
1066 -1, -1, -1, -1, -1, -1, -1, -1,
1067 -1, -1, -1, -1, -1, -1, -1, -1
1070 0x00000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE,
1071 0x00000000, 0x04202400, 0xFF7FFFFF, 0xFF7FFFFF,
1072 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1073 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1074 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
1075 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
1076 0xFFFFFFFF, 0xFFFFFFFF, 0xE0FFFFFF, 0x0400FFFF,
1077 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
1078 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1079 0xFFFFFC7B, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
1080 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
1081 0xFFFE00FF, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
1082 0x003F000F, 0x07FFFFFE, 0x01FFFFFF, 0xFFFFC3FF,
1083 0xFFFFFFFF, 0xFFFFFFFF, 0xBFEFFFFF, 0x9FFFFDFF,
1084 0xFFFF8000, 0xFFFFFFFF, 0x0000E7FF, 0x00000000,
1085 0xFFFFFFFF, 0x0003FFFF, 0x00000000, 0x00000000,
1086 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1087 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1088 0xFFFFFFFE, 0xF3FFFFFF, 0xFF1F3FFF, 0x0000FFCF,
1089 0xFFF99FEE, 0xF3C5FDFF, 0xB080399F, 0x0003FFCF,
1090 0xFFF987EE, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
1091 0xFFFBBFEE, 0xF3EDFDFF, 0x00013BBF, 0x0000FFCF,
1092 0xFFF99FEE, 0xF3EDFDFF, 0xB0C0398F, 0x0002FFC3,
1093 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
1094 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
1095 0xFFFDDFEC, 0xF3EFFDFF, 0x40603DDF, 0x0000FFC3,
1096 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
1097 0xFC7FFFEC, 0x2FFBFFFF, 0xFF5F847F, 0x000C0000,
1098 0xFFFFFFFE, 0x07FFFFFF, 0x03FF7FFF, 0x00000000,
1099 0xFEF02596, 0x3BFFECAE, 0x33FF3F5F, 0x00000000,
1100 0x03000001, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE07FF,
1101 0xFEFF0FDF, 0x1FFFFFFF, 0x00000040, 0x00000000,
1102 0xFFFFFFFF, 0x03C7F6FB, 0x03FF03FF, 0x00000000,
1103 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
1104 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
1105 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
1106 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
1107 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
1108 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x0003FE00,
1109 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
1110 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1111 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1112 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1113 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1114 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
1115 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
1116 0x001FDFFF, 0x001FFFFF, 0x000FFFFF, 0x000DDFFF,
1117 0xFFFFFFFF, 0xFFFFFFFF, 0x308FFFFF, 0x000003FF,
1118 0x03FF3800, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
1119 0xFFFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
1120 0x1FFFFFFF, 0x0FFF0FFF, 0xFFFFFFC0, 0x001F3FFF,
1121 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1122 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1123 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1124 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
1125 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1126 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1127 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
1128 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
1129 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
1130 0x0000F000, 0x80007C00, 0x00100001, 0x8002FC0F,
1131 0x00000000, 0x00000000, 0x1FFF0000, 0x000007E2,
1132 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
1133 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
1134 0x000000E0, 0x1F3EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
1135 0xE67FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF,
1136 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
1137 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
1138 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1139 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1140 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1141 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1142 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1143 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1144 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1145 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
1146 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1147 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1148 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1149 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
1150 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1151 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
1152 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1153 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1154 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1155 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1156 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1157 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
1158 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1159 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1160 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1161 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1162 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
1163 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1164 0xE0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
1165 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
1166 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1167 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1168 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
1169 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
1170 0x0000FFFF, 0x0018000F, 0x0000E000, 0xFFDF0000,
1171 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x9FFFFFFF,
1172 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFE0,
1173 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x0E000000,
1174 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
1175 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
1176 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1177 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1178 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1179 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1180 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
1181 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
1182 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1183 0x3FFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
1184 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1185 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1186 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
1187 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1188 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1189 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1190 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1191 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1192 0x00000000, 0x00000000, 0x00000000, 0xFFFFE3E0,
1193 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000,
1194 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
1195 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
1196 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
1197 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1198 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1199 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
1200 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
1201 0xFFFFFDFF, 0xFFFFFDFF, 0xFFFFC3F7, 0xFFFFFFFF,
1202 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1203 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
1204 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1205 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1206 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
1207 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1208 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1209 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1210 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1211 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1212 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1213 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF
1217 /* Return true if a given character can occur as first character of an
1218 identifier. See ECMA-334 section 9.4.2. */
1220 is_identifier_start (int c)
1222 return bitmap_lookup (&table_identifier_start, c);
1223 /* In ASCII only this would be:
1224 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_');
1228 /* Return true if a given character can occur as character of an identifier.
1229 See ECMA-334 section 9.4.2. */
1231 is_identifier_part (int c)
1233 return bitmap_lookup (&table_identifier_part, c);
1234 /* In ASCII only this would be:
1235 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
1236 || (c >= '0' && c <= '9') || c == '_');
1241 is_any_character (int c)
1247 /* ======================= Preprocessor directives. ======================= */
1250 /* Phase 5: Remove preprocessor lines. See ECMA-334 section 9.5.
1251 As a side effect, this also removes initial whitespace on every line;
1252 this whitespace doesn't matter. */
1254 static int phase5_pushback[10];
1255 static int phase5_pushback_length;
1262 if (phase5_pushback_length)
1263 return phase5_pushback[--phase5_pushback_length];
1271 while (c != UEOF && is_whitespace (c));
1275 /* Ignore the entire line containing the preprocessor directive
1276 (including the // comment if it contains one). */
1279 while (c != UEOF && c != UNL);
1291 phase5_ungetc (int c)
1295 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1297 phase5_pushback[phase5_pushback_length++] = c;
1303 /* ========================== Reading of tokens. ========================== */
1308 token_type_lparen, /* ( */
1309 token_type_rparen, /* ) */
1310 token_type_lbrace, /* { */
1311 token_type_rbrace, /* } */
1312 token_type_comma, /* , */
1313 token_type_dot, /* . */
1314 token_type_string_literal, /* "abc", @"abc" */
1315 token_type_number, /* 1.23 */
1316 token_type_symbol, /* identifier, keyword, null */
1317 token_type_plus, /* + */
1318 token_type_other /* character literal, misc. operator */
1320 typedef enum token_type_ty token_type_ty;
1322 typedef struct token_ty token_ty;
1326 char *string; /* for token_type_string_literal, token_type_symbol */
1327 refcounted_string_list_ty *comment; /* for token_type_string_literal */
1329 int logical_line_number;
1333 /* Free the memory pointed to by a 'struct token_ty'. */
1335 free_token (token_ty *tp)
1337 if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
1339 if (tp->type == token_type_string_literal)
1340 drop_reference (tp->comment);
1344 /* Read a Unicode escape sequence outside string/character literals.
1345 Reject Unicode escapes that don't fulfill the given predicate.
1346 See ECMA-334 section 9.4.2. */
1348 do_getc_unicode_escaped (bool (*predicate) (int))
1352 /* Use phase 3, because phase 4 elides comments. */
1356 if (c == 'u' || c == 'U')
1358 unsigned char buf[8];
1363 expect = (c == 'U' ? 8 : 4);
1365 for (i = 0; i < expect; i++)
1367 int c1 = phase3_getc ();
1369 if (c1 >= '0' && c1 <= '9')
1370 n = (n << 4) + (c1 - '0');
1371 else if (c1 >= 'A' && c1 <= 'F')
1372 n = (n << 4) + (c1 - 'A' + 10);
1373 else if (c1 >= 'a' && c1 <= 'f')
1374 n = (n << 4) + (c1 - 'a' + 10);
1379 phase3_ungetc (buf[i]);
1389 error_with_progname = false;
1390 error (0, 0, _("%s:%d: warning: invalid Unicode character"),
1391 logical_file_name, line_number);
1392 error_with_progname = true;
1394 else if (predicate (n))
1398 phase3_ungetc (buf[i]);
1405 /* Read an escape sequence inside a string literal or character literal.
1406 See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
1414 /* Use phase 3, because phase 4 elides comments. */
1448 phase3_ungetc ('x');
1451 case '0': case '1': case '2': case '3': case '4':
1452 case '5': case '6': case '7': case '8': case '9':
1453 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1454 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1465 case '0': case '1': case '2': case '3': case '4':
1466 case '5': case '6': case '7': case '8': case '9':
1467 n = n * 16 + c - '0';
1469 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1470 n = n * 16 + 10 + c - 'A';
1472 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1473 n = n * 16 + 10 + c - 'a';
1483 return do_getc_unicode_escaped (is_any_character);
1485 /* Invalid escape sequence. */
1491 /* Read a regular string literal or character literal.
1492 See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
1494 accumulate_escaped (struct string_buffer *literal, int delimiter)
1500 /* Use phase 3, because phase 4 elides comments. */
1502 if (c == UEOF || c == delimiter)
1507 error_with_progname = false;
1508 if (delimiter == '\'')
1509 error (0, 0, _("%s:%d: warning: unterminated character constant"),
1510 logical_file_name, line_number);
1512 error (0, 0, _("%s:%d: warning: unterminated string constant"),
1513 logical_file_name, line_number);
1514 error_with_progname = true;
1518 c = do_getc_escaped ();
1519 string_buffer_append_unicode (literal, c);
1524 /* Combine characters into tokens. Discard whitespace. */
1526 /* Maximum used guaranteed to be < 4. */
1527 static token_ty phase6_pushback[4];
1528 static int phase6_pushback_length;
1531 phase6_get (token_ty *tp)
1535 if (phase6_pushback_length)
1537 *tp = phase6_pushback[--phase6_pushback_length];
1544 tp->line_number = line_number;
1545 tp->logical_line_number = logical_line_number;
1550 tp->type = token_type_eof;
1557 if (last_non_comment_line > last_comment_line)
1558 savable_comment_reset ();
1563 /* Ignore whitespace and comments. */
1567 last_non_comment_line = tp->logical_line_number;
1572 tp->type = token_type_lparen;
1576 tp->type = token_type_rparen;
1580 tp->type = token_type_lbrace;
1584 tp->type = token_type_rbrace;
1588 tp->type = token_type_comma;
1593 if (!(c >= '0' && c <= '9'))
1596 tp->type = token_type_dot;
1601 case '0': case '1': case '2': case '3': case '4':
1602 case '5': case '6': case '7': case '8': case '9':
1604 /* Don't need to verify the complicated syntax of integers and
1605 floating-point numbers. We assume a valid C# input.
1606 The simplified syntax that we recognize as number is: any
1607 sequence of alphanumeric characters, additionally '+' and '-'
1608 immediately after 'e' or 'E' except in hexadecimal numbers. */
1609 bool hexadecimal = false;
1614 if (c >= '0' && c <= '9')
1616 if ((c >= 'A' && c <= 'Z') || (c >= 'a' &&c <= 'z'))
1618 if (c == 'X' || c == 'x')
1620 if ((c == 'E' || c == 'e') && !hexadecimal)
1623 if (!(c == '+' || c == '-'))
1633 tp->type = token_type_number;
1638 /* Regular string literal. */
1640 struct string_buffer literal;
1642 lexical_context = lc_string;
1643 init_string_buffer (&literal);
1644 accumulate_escaped (&literal, '"');
1645 tp->string = xstrdup (string_buffer_result (&literal));
1646 free_string_buffer (&literal);
1647 tp->comment = add_reference (savable_comment);
1648 lexical_context = lc_outside;
1649 tp->type = token_type_string_literal;
1654 /* Character literal. */
1656 struct string_buffer literal;
1658 init_string_buffer (&literal);
1659 accumulate_escaped (&literal, '\'');
1660 free_string_buffer (&literal);
1661 tp->type = token_type_other;
1669 tp->type = token_type_other;
1672 tp->type = token_type_other;
1677 tp->type = token_type_plus;
1685 /* Verbatim string literal. */
1686 struct string_buffer literal;
1688 lexical_context = lc_string;
1689 init_string_buffer (&literal);
1692 /* Use phase 2, because phase 4 elides comments and phase 3
1693 mixes up the newline characters. */
1706 /* No special treatment of newline and backslash here. */
1707 string_buffer_append_unicode (&literal, c);
1709 tp->string = xstrdup (string_buffer_result (&literal));
1710 free_string_buffer (&literal);
1711 tp->comment = add_reference (savable_comment);
1712 lexical_context = lc_outside;
1713 tp->type = token_type_string_literal;
1716 /* FALLTHROUGH, so that @identifier is recognized. */
1720 c = do_getc_unicode_escaped (is_identifier_start);
1721 if (is_identifier_start (c))
1723 static struct string_buffer buffer;
1724 buffer.utf8_buflen = 0;
1727 string_buffer_append_unicode (&buffer, c);
1730 c = do_getc_unicode_escaped (is_identifier_part);
1731 if (!is_identifier_part (c))
1735 tp->string = xstrdup (string_buffer_result (&buffer));
1736 tp->type = token_type_symbol;
1741 /* Misc. operator. */
1742 tp->type = token_type_other;
1749 /* Supports 3 tokens of pushback. */
1751 phase6_unget (token_ty *tp)
1753 if (tp->type != token_type_eof)
1755 if (phase6_pushback_length == SIZEOF (phase6_pushback))
1757 phase6_pushback[phase6_pushback_length++] = *tp;
1762 /* Compile-time optimization of string literal concatenation.
1763 Combine "string1" + ... + "stringN" to the concatenated string if
1764 - the token after this expression is not '.' (because then the last
1765 string could be part of a method call expression). */
1767 static token_ty phase7_pushback[2];
1768 static int phase7_pushback_length;
1771 phase7_get (token_ty *tp)
1773 if (phase7_pushback_length)
1775 *tp = phase7_pushback[--phase7_pushback_length];
1780 if (tp->type == token_type_string_literal)
1782 char *sum = tp->string;
1783 size_t sum_len = strlen (sum);
1789 phase6_get (&token2);
1790 if (token2.type == token_type_plus)
1794 phase6_get (&token3);
1795 if (token3.type == token_type_string_literal)
1797 token_ty token_after;
1799 phase6_get (&token_after);
1800 if (token_after.type != token_type_dot)
1802 char *addend = token3.string;
1803 size_t addend_len = strlen (addend);
1805 sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
1806 memcpy (sum + sum_len, addend, addend_len + 1);
1807 sum_len += addend_len;
1809 phase6_unget (&token_after);
1810 free_token (&token3);
1811 free_token (&token2);
1814 phase6_unget (&token_after);
1816 phase6_unget (&token3);
1818 phase6_unget (&token2);
1825 /* Supports 2 tokens of pushback. */
1827 phase7_unget (token_ty *tp)
1829 if (tp->type != token_type_eof)
1831 if (phase7_pushback_length == SIZEOF (phase7_pushback))
1833 phase7_pushback[phase7_pushback_length++] = *tp;
1839 x_csharp_lex (token_ty *tp)
1844 /* Supports 2 tokens of pushback. */
1846 x_csharp_unlex (token_ty *tp)
1852 /* ========================= Extracting strings. ========================== */
1855 /* Context lookup table. */
1856 static flag_context_list_table_ty *flag_context_list_table;
1859 /* The file is broken into tokens. Scan the token stream, looking for
1860 a keyword, followed by a left paren, followed by a string. When we
1861 see this sequence, we have something to remember. We assume we are
1862 looking at a valid C or C++ program, and leave the complaints about
1863 the grammar to the compiler.
1865 Normal handling: Look for
1866 keyword ( ... msgid ... )
1867 Plural handling: Look for
1868 keyword ( ... msgid ... msgid_plural ... )
1870 We use recursion because the arguments before msgid or between msgid
1871 and msgid_plural can contain subexpressions of the same form. */
1874 /* Extract messages until the next balanced closing parenthesis or brace,
1875 depending on TERMINATOR.
1876 Extracted messages are added to MLP.
1877 Return true upon eof, false upon closing parenthesis or brace. */
1879 extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
1880 flag_context_ty outer_context,
1881 flag_context_list_iterator_ty context_iter,
1882 struct arglist_parser *argparser)
1884 /* Current argument number. */
1886 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
1888 /* Parameters of the keyword just seen. Defined only in state 1. */
1889 const struct callshapes *next_shapes = NULL;
1890 /* Context iterator that will be used if the next token is a '('. */
1891 flag_context_list_iterator_ty next_context_iter =
1892 passthrough_context_list_iterator;
1893 /* Current context. */
1894 flag_context_ty inner_context =
1895 inherited_context (outer_context,
1896 flag_context_list_iterator_advance (&context_iter));
1898 /* Start state is 0. */
1905 x_csharp_lex (&token);
1908 case token_type_symbol:
1910 /* Combine symbol1 . ... . symbolN to a single strings, so that
1911 we can recognize static function calls like
1912 GettextResource.gettext. The information present for
1913 symbolI.....symbolN has precedence over the information for
1914 symbolJ.....symbolN with J > I. */
1915 char *sum = token.string;
1916 size_t sum_len = strlen (sum);
1917 const char *dottedname;
1918 flag_context_list_ty *context_list;
1924 x_csharp_lex (&token2);
1925 if (token2.type == token_type_dot)
1929 x_csharp_lex (&token3);
1930 if (token3.type == token_type_symbol)
1932 char *addend = token3.string;
1933 size_t addend_len = strlen (addend);
1936 (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
1938 memcpy (sum + sum_len + 1, addend, addend_len + 1);
1939 sum_len += 1 + addend_len;
1941 free_token (&token3);
1942 free_token (&token2);
1945 x_csharp_unlex (&token3);
1947 x_csharp_unlex (&token2);
1951 for (dottedname = sum;;)
1953 void *keyword_value;
1955 if (hash_find_entry (&keywords, dottedname, strlen (dottedname),
1959 next_shapes = (const struct callshapes *) keyword_value;
1964 dottedname = strchr (dottedname, '.');
1965 if (dottedname == NULL)
1973 for (dottedname = sum;;)
1976 flag_context_list_table_lookup (
1977 flag_context_list_table,
1978 dottedname, strlen (dottedname));
1979 if (context_list != NULL)
1982 dottedname = strchr (dottedname, '.');
1983 if (dottedname == NULL)
1987 next_context_iter = flag_context_list_iterator (context_list);
1993 case token_type_lparen:
1994 if (extract_parenthesized (mlp, token_type_rparen,
1995 inner_context, next_context_iter,
1996 arglist_parser_alloc (mlp,
1997 state ? next_shapes : NULL)))
1999 xgettext_current_source_encoding = po_charset_utf8;
2000 arglist_parser_done (argparser, arg);
2001 xgettext_current_source_encoding = xgettext_global_source_encoding;
2004 next_context_iter = null_context_list_iterator;
2008 case token_type_rparen:
2009 if (terminator == token_type_rparen)
2011 xgettext_current_source_encoding = po_charset_utf8;
2012 arglist_parser_done (argparser, arg);
2013 xgettext_current_source_encoding = xgettext_global_source_encoding;
2016 if (terminator == token_type_rbrace)
2018 error_with_progname = false;
2020 _("%s:%d: warning: ')' found where '}' was expected"),
2021 logical_file_name, token.line_number);
2022 error_with_progname = true;
2024 next_context_iter = null_context_list_iterator;
2028 case token_type_lbrace:
2029 if (extract_parenthesized (mlp, token_type_rbrace,
2030 null_context, null_context_list_iterator,
2031 arglist_parser_alloc (mlp, NULL)))
2033 xgettext_current_source_encoding = po_charset_utf8;
2034 arglist_parser_done (argparser, arg);
2035 xgettext_current_source_encoding = xgettext_global_source_encoding;
2038 next_context_iter = null_context_list_iterator;
2042 case token_type_rbrace:
2043 if (terminator == token_type_rbrace)
2045 xgettext_current_source_encoding = po_charset_utf8;
2046 arglist_parser_done (argparser, arg);
2047 xgettext_current_source_encoding = xgettext_global_source_encoding;
2050 if (terminator == token_type_rparen)
2052 error_with_progname = false;
2054 _("%s:%d: warning: '}' found where ')' was expected"),
2055 logical_file_name, token.line_number);
2056 error_with_progname = true;
2058 next_context_iter = null_context_list_iterator;
2062 case token_type_comma:
2065 inherited_context (outer_context,
2066 flag_context_list_iterator_advance (
2068 next_context_iter = passthrough_context_list_iterator;
2072 case token_type_string_literal:
2075 pos.file_name = logical_file_name;
2076 pos.line_number = token.line_number;
2078 xgettext_current_source_encoding = po_charset_utf8;
2080 remember_a_message (mlp, NULL, token.string, inner_context,
2081 &pos, NULL, token.comment);
2083 arglist_parser_remember (argparser, arg, token.string,
2085 pos.file_name, pos.line_number,
2087 xgettext_current_source_encoding = xgettext_global_source_encoding;
2089 drop_reference (token.comment);
2090 next_context_iter = null_context_list_iterator;
2094 case token_type_eof:
2095 xgettext_current_source_encoding = po_charset_utf8;
2096 arglist_parser_done (argparser, arg);
2097 xgettext_current_source_encoding = xgettext_global_source_encoding;
2100 case token_type_dot:
2101 case token_type_number:
2102 case token_type_plus:
2103 case token_type_other:
2104 next_context_iter = null_context_list_iterator;
2116 extract_csharp (FILE *f,
2117 const char *real_filename, const char *logical_filename,
2118 flag_context_list_table_ty *flag_table,
2119 msgdomain_list_ty *mdlp)
2121 message_list_ty *mlp = mdlp->item[0]->messages;
2124 real_file_name = real_filename;
2125 logical_file_name = xstrdup (logical_filename);
2128 lexical_context = lc_outside;
2130 logical_line_number = 1;
2131 last_comment_line = -1;
2132 last_non_comment_line = -1;
2134 flag_context_list_table = flag_table;
2138 /* Eat tokens until eof is seen. When extract_parenthesized returns
2139 due to an unbalanced closing parenthesis, just restart it. */
2140 while (!extract_parenthesized (mlp, token_type_eof,
2141 null_context, null_context_list_iterator,
2142 arglist_parser_alloc (mlp, NULL)))
2146 real_file_name = NULL;
2147 logical_file_name = NULL;