1 /* xgettext C# backend.
2 Copyright (C) 2003, 2005-2009, 2011, 2015 Free Software Foundation,
4 Written by Bruno Haible <bruno@clisp.org>, 2003.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
36 #include "error-progname.h"
39 #include "xvasprintf.h"
41 #include "po-charset.h"
45 #define _(s) gettext(s)
47 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
50 /* The C# syntax is defined in ECMA-334, second edition. */
53 /* ====================== Keyword set customization. ====================== */
55 /* If true extract all strings. */
56 static bool extract_all = false;
58 static hash_table keywords;
59 static bool default_keywords = true;
63 x_csharp_extract_all ()
69 /* Processes a --keyword option.
70 Non-ASCII function names can be used if given in UTF-8 encoding. */
72 x_csharp_keyword (const char *name)
75 default_keywords = false;
79 struct callshape shape;
82 if (keywords.table == NULL)
83 hash_init (&keywords, 100);
85 split_keywordspec (name, &end, &shape);
87 /* The characters between name and end should form a valid C#
88 identifier sequence with dots.
89 A colon means an invalid parse in split_keywordspec(). */
90 colon = strchr (name, ':');
91 if (colon == NULL || colon >= end)
92 insert_keyword_callshape (&keywords, name, end - name, &shape);
96 /* Finish initializing the keywords hash table.
97 Called after argument processing, before each file is processed. */
101 if (default_keywords)
103 /* When adding new keywords here, also update the documentation in
105 x_csharp_keyword ("GetString"); /* Resource{Manager,Set}.GetString */
106 x_csharp_keyword ("GetPluralString:1,2"); /* GettextResource{Manager,Set}.GetPluralString */
107 x_csharp_keyword ("GetParticularString:1c,2"); /* Resource{Manager,Set}.GetParticularString */
108 x_csharp_keyword ("GetParticularPluralString:1c,2,3"); /* Resource{Manager,Set}.GetParticularPluralString */
109 default_keywords = false;
114 init_flag_table_csharp ()
116 xgettext_record_flag ("GetString:1:pass-csharp-format");
117 xgettext_record_flag ("GetPluralString:1:pass-csharp-format");
118 xgettext_record_flag ("GetPluralString:2:pass-csharp-format");
119 xgettext_record_flag ("GetParticularString:2:pass-csharp-format");
120 xgettext_record_flag ("GetParticularPluralString:2:pass-csharp-format");
121 xgettext_record_flag ("GetParticularPluralString:3:pass-csharp-format");
122 xgettext_record_flag ("String.Format:1:csharp-format");
126 /* ======================== Reading of characters. ======================== */
128 /* Real filename, used in error messages about the input file. */
129 static const char *real_file_name;
131 /* Logical filename and line number, used to label the extracted messages. */
132 static char *logical_file_name;
133 static int line_number;
135 /* The input file stream. */
139 /* Phase 1: line_number handling. */
141 /* Maximum used, roughly a safer MB_LEN_MAX. */
142 #define MAX_PHASE1_PUSHBACK 16
143 static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
144 static int phase1_pushback_length;
146 /* Read the next single byte from the input file. */
152 if (phase1_pushback_length)
154 c = phase1_pushback[--phase1_pushback_length];
164 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
174 /* Supports MAX_PHASE1_PUSHBACK characters of pushback. */
176 phase1_ungetc (int c)
182 if (phase1_pushback_length == SIZEOF (phase1_pushback))
184 phase1_pushback[phase1_pushback_length++] = c;
189 /* Phase 2: Conversion to Unicode.
190 This is done early because ECMA-334 section 9.1. says that the source is
191 "an ordered sequence of Unicode characters", and because the recognition
192 of the line terminators (ECMA-334 section 9.3.1) is hardly possible without
193 prior conversion to Unicode. */
195 /* End-of-file indicator for functions returning an UCS-4 character. */
198 /* Newline Unicode character. */
201 static lexical_context_ty lexical_context;
203 static int phase2_pushback[1];
204 static int phase2_pushback_length;
206 /* Read the next Unicode UCS-4 character from the input file. */
210 if (phase2_pushback_length)
211 return phase2_pushback[--phase2_pushback_length];
213 if (xgettext_current_source_encoding == po_charset_ascii)
215 int c = phase1_getc ();
220 multiline_error (xstrdup (""),
221 xasprintf ("%s\n%s\n",
222 non_ascii_error_message (lexical_context,
226 Please specify the source encoding through --from-code.")));
231 else if (xgettext_current_source_encoding != po_charset_utf8)
234 /* Use iconv on an increasing number of bytes. Read only as many bytes
235 through phase1_getc as needed. This is needed to give reasonable
236 interactive behaviour when fp is connected to an interactive tty. */
237 unsigned char buf[MAX_PHASE1_PUSHBACK];
239 int c = phase1_getc ();
242 buf[0] = (unsigned char) c;
247 unsigned char scratchbuf[6];
248 const char *inptr = (const char *) &buf[0];
249 size_t insize = bufcount;
250 char *outptr = (char *) &scratchbuf[0];
251 size_t outsize = sizeof (scratchbuf);
253 size_t res = iconv (xgettext_current_source_iconv,
254 (ICONV_CONST char **) &inptr, &insize,
256 /* We expect that a character has been produced if and only if
257 some input bytes have been consumed. */
258 if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
260 if (outsize == sizeof (scratchbuf))
262 /* No character has been produced. Must be an error. */
263 if (res != (size_t)(-1))
268 /* An invalid multibyte sequence was encountered. */
269 multiline_error (xstrdup (""),
271 %s:%d: Invalid multibyte sequence.\n\
272 Please specify the correct source encoding through --from-code.\n"),
273 real_file_name, line_number));
276 else if (errno == EINVAL)
278 /* An incomplete multibyte character. */
281 if (bufcount == MAX_PHASE1_PUSHBACK)
283 /* An overlong incomplete multibyte sequence was
285 multiline_error (xstrdup (""),
287 %s:%d: Long incomplete multibyte sequence.\n\
288 Please specify the correct source encoding through --from-code.\n"),
289 real_file_name, line_number));
293 /* Read one more byte and retry iconv. */
297 multiline_error (xstrdup (""),
299 %s:%d: Incomplete multibyte sequence at end of file.\n\
300 Please specify the correct source encoding through --from-code.\n"),
301 real_file_name, line_number));
306 multiline_error (xstrdup (""),
308 %s:%d: Incomplete multibyte sequence at end of line.\n\
309 Please specify the correct source encoding through --from-code.\n"),
310 real_file_name, line_number - 1));
313 buf[bufcount++] = (unsigned char) c;
316 error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
317 real_file_name, line_number);
321 size_t outbytes = sizeof (scratchbuf) - outsize;
322 size_t bytes = bufcount - insize;
325 /* We expect that one character has been produced. */
330 /* Push back the unused bytes. */
332 phase1_ungetc (buf[--insize]);
333 /* Convert the character from UTF-8 to UCS-4. */
334 if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes)
336 /* scratchbuf contains an out-of-range Unicode character
338 multiline_error (xstrdup (""),
340 %s:%d: Invalid multibyte sequence.\n\
341 Please specify the source encoding through --from-code.\n"),
342 real_file_name, line_number));
349 /* If we don't have iconv(), the only supported values for
350 xgettext_global_source_encoding and thus also for
351 xgettext_current_source_encoding are ASCII and UTF-8. */
357 /* Read an UTF-8 encoded character. */
358 unsigned char buf[6];
379 && ((buf[1] ^ 0x80) < 0x40))
389 && ((buf[1] ^ 0x80) < 0x40)
390 && ((buf[2] ^ 0x80) < 0x40))
400 && ((buf[1] ^ 0x80) < 0x40)
401 && ((buf[2] ^ 0x80) < 0x40)
402 && ((buf[3] ^ 0x80) < 0x40))
412 && ((buf[1] ^ 0x80) < 0x40)
413 && ((buf[2] ^ 0x80) < 0x40)
414 && ((buf[3] ^ 0x80) < 0x40)
415 && ((buf[4] ^ 0x80) < 0x40))
424 u8_mbtouc (&uc, buf, count);
429 /* Supports only one pushback character. */
431 phase2_ungetc (int c)
435 if (phase2_pushback_length == SIZEOF (phase2_pushback))
437 phase2_pushback[phase2_pushback_length++] = c;
442 /* Phase 3: Convert all line terminators to LF.
443 See ECMA-334 section 9.3.1. */
445 /* Line number defined in terms of phase3. */
446 static int logical_line_number;
448 static int phase3_pushback[9];
449 static int phase3_pushback_length;
451 /* Read the next Unicode UCS-4 character from the input file, mapping
452 all line terminators to U+000A, and dropping U+001A at the end of file. */
458 if (phase3_pushback_length)
460 c = phase3_pushback[--phase3_pushback_length];
462 ++logical_line_number;
470 int c1 = phase2_getc ();
472 if (c1 != UEOF && c1 != 0x000a)
475 /* Seen line terminator CR or CR/LF. */
476 ++logical_line_number;
480 if (c == 0x0085 || c == 0x2028 || c == 0x2029)
482 /* Seen Unicode word processor newline. */
483 ++logical_line_number;
489 int c1 = phase2_getc ();
492 /* Seen U+001A right before the end of file. */
499 ++logical_line_number;
503 /* Supports 9 characters of pushback. */
505 phase3_ungetc (int c)
510 --logical_line_number;
511 if (phase3_pushback_length == SIZEOF (phase3_pushback))
513 phase3_pushback[phase3_pushback_length++] = c;
518 /* ========================= Accumulating strings. ======================== */
520 /* A string buffer type that allows appending Unicode characters.
521 Returns the entire string in UTF-8 encoding. */
525 /* The part of the string that has already been converted to UTF-8. */
528 size_t utf8_allocated;
531 /* Initialize a 'struct string_buffer' to empty. */
533 init_string_buffer (struct string_buffer *bp)
535 bp->utf8_buffer = NULL;
537 bp->utf8_allocated = 0;
540 /* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
542 string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count)
544 if (bp->utf8_buflen + count > bp->utf8_allocated)
546 size_t new_allocated = 2 * bp->utf8_allocated + 10;
547 if (new_allocated < bp->utf8_buflen + count)
548 new_allocated = bp->utf8_buflen + count;
549 bp->utf8_allocated = new_allocated;
550 bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
554 /* Auxiliary function: Append a Unicode character to bp->utf8.
555 uc must be < 0x110000. */
557 string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc)
559 unsigned char utf8buf[6];
560 int count = u8_uctomb (utf8buf, uc, 6);
563 /* The caller should have ensured that uc is not out-of-range. */
566 string_buffer_append_unicode_grow (bp, count);
567 memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
568 bp->utf8_buflen += count;
571 /* Return the string buffer's contents. */
573 string_buffer_result (struct string_buffer *bp)
575 /* NUL-terminate it. */
576 string_buffer_append_unicode_grow (bp, 1);
577 bp->utf8_buffer[bp->utf8_buflen] = '\0';
579 return bp->utf8_buffer;
582 /* Free the memory pointed to by a 'struct string_buffer'. */
584 free_string_buffer (struct string_buffer *bp)
586 free (bp->utf8_buffer);
590 /* ======================== Accumulating comments. ======================== */
593 /* Accumulating a single comment line. */
595 static struct string_buffer comment_buffer;
600 lexical_context = lc_comment;
601 comment_buffer.utf8_buflen = 0;
607 return (comment_buffer.utf8_buflen == 0);
613 string_buffer_append_unicode (&comment_buffer, c);
617 comment_line_end (size_t chars_to_remove)
619 char *buffer = string_buffer_result (&comment_buffer);
620 size_t buflen = strlen (buffer);
622 buflen -= chars_to_remove;
624 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
626 buffer[buflen] = '\0';
627 savable_comment_add (buffer);
628 lexical_context = lc_outside;
632 /* These are for tracking whether comments count as immediately before
634 static int last_comment_line;
635 static int last_non_comment_line;
638 /* Phase 4: Replace each comment that is not inside a character constant or
639 string literal with a space or newline character.
640 See ECMA-334 section 9.3.2. */
660 /* C style comment. */
662 last_was_star = false;
668 /* We skip all leading white space, but not EOLs. */
669 if (!(comment_at_start () && (c == ' ' || c == '\t')))
674 comment_line_end (1);
676 last_was_star = false;
680 last_was_star = true;
686 comment_line_end (2);
692 last_was_star = false;
697 last_comment_line = logical_line_number;
701 /* C++ style comment. */
702 last_comment_line = logical_line_number;
707 if (c == UNL || c == UEOF)
709 /* We skip all leading white space, but not EOLs. */
710 if (!(comment_at_start () && (c == ' ' || c == '\t')))
713 phase3_ungetc (c); /* push back the newline, to decrement logical_line_number */
714 comment_line_end (0);
715 phase3_getc (); /* read the newline again */
720 /* Supports only one pushback character. */
722 phase4_ungetc (int c)
728 /* ======================= Character classification. ====================== */
731 /* Return true if a given character is white space.
732 See ECMA-334 section 9.3.3. */
734 is_whitespace (int c)
736 /* Unicode character class Zs, as of Unicode 4.0. */
737 /* grep '^[^;]*;[^;]*;Zs;' UnicodeData-4.0.0.txt */
741 return (c == 0x0020 || c == 0x00a0);
743 return (c == 0x1680);
745 return (c == 0x180e);
747 return ((c >= 0x2000 && c <= 0x200b) || c == 0x202f || c == 0x205f);
749 return (c == 0x3000);
756 /* C# allows identifiers containing many Unicode characters. We recognize
757 them; to use an identifier with Unicode characters in a --keyword option,
758 it must be specified in UTF-8. */
761 bitmap_lookup (const void *table, unsigned int uc)
763 unsigned int index1 = uc >> 16;
764 if (index1 < ((const int *) table)[0])
766 int lookup1 = ((const int *) table)[1 + index1];
769 unsigned int index2 = (uc >> 9) & 0x7f;
770 int lookup2 = ((const int *) table)[lookup1 + index2];
773 unsigned int index3 = (uc >> 5) & 0xf;
774 unsigned int lookup3 = ((const int *) table)[lookup2 + index3];
776 return (lookup3 >> (uc & 0x1f)) & 1;
783 /* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, as of Unicode 4.0,
784 plus the underscore. */
791 /*unsigned*/ int level3[34 << 4];
793 table_identifier_start =
798 388, 404, 420, 436, 452, 468, 484, 500,
799 516, 532, 548, 564, 580, -1, 596, 612,
800 628, -1, -1, -1, -1, -1, -1, -1,
801 644, -1, 660, 660, 660, 660, 660, 660,
802 660, 660, 660, 660, 660, 660, 676, 660,
803 660, 660, 660, 660, 660, 660, 660, 660,
804 660, 660, 660, 660, 660, 660, 660, 660,
805 660, 660, 660, 660, 660, 660, 660, 660,
806 660, 660, 660, 660, 660, 660, 660, 660,
807 660, 660, 660, 660, 660, 660, 660, 692,
808 660, 660, 708, -1, -1, -1, 660, 660,
809 660, 660, 660, 660, 660, 660, 660, 660,
810 660, 660, 660, 660, 660, 660, 660, 660,
811 660, 660, 660, 724, -1, -1, -1, -1,
812 -1, -1, -1, -1, -1, -1, -1, -1,
813 -1, -1, -1, -1, 740, 756, 772, 788,
814 804, 820, 836, -1, 852, -1, -1, -1,
815 -1, -1, -1, -1, -1, -1, -1, -1,
816 -1, -1, -1, -1, -1, -1, -1, -1,
817 -1, -1, -1, -1, -1, -1, -1, -1,
818 -1, -1, -1, -1, -1, -1, -1, -1,
819 -1, -1, -1, -1, -1, -1, -1, -1,
820 -1, -1, -1, -1, -1, -1, -1, -1,
821 -1, -1, -1, -1, -1, -1, -1, -1,
822 -1, -1, -1, -1, -1, -1, -1, -1,
823 -1, -1, -1, -1, -1, -1, -1, -1,
824 -1, -1, -1, -1, -1, -1, -1, -1,
825 -1, -1, -1, -1, -1, -1, -1, -1,
826 -1, -1, -1, -1, -1, -1, -1, -1,
827 -1, -1, 868, 884, -1, -1, -1, -1,
828 -1, -1, -1, -1, -1, -1, -1, -1,
829 -1, -1, -1, -1, -1, -1, -1, -1,
830 660, 660, 660, 660, 660, 660, 660, 660,
831 660, 660, 660, 660, 660, 660, 660, 660,
832 660, 660, 660, 660, 660, 660, 660, 660,
833 660, 660, 660, 660, 660, 660, 660, 660,
834 660, 660, 660, 660, 660, 660, 660, 660,
835 660, 660, 660, 660, 660, 660, 660, 660,
836 660, 660, 660, 660, 660, 660, 660, 660,
837 660, 660, 660, 660, 660, 660, 660, 660,
838 660, 660, 660, 660, 660, 660, 660, 660,
839 660, 660, 660, 660, 660, 660, 660, 660,
840 660, 660, 660, 900, -1, -1, -1, -1,
841 -1, -1, -1, -1, -1, -1, -1, -1,
842 -1, -1, -1, -1, -1, -1, -1, -1,
843 -1, -1, -1, -1, -1, -1, -1, -1,
844 -1, -1, -1, -1, -1, -1, -1, -1,
845 -1, -1, -1, -1, 660, 916, -1, -1
848 0x00000000, 0x00000000, 0x87FFFFFE, 0x07FFFFFE,
849 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF,
850 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
851 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
852 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
853 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
854 0x00000000, 0x00000000, 0x00000000, 0x04000000,
855 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
856 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
857 0xFFFFFC03, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
858 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
859 0x000000FF, 0x00000000, 0xFFFF0000, 0x000707FF,
860 0x00000000, 0x07FFFFFE, 0x000007FF, 0xFFFEC000,
861 0xFFFFFFFF, 0xFFFFFFFF, 0x002FFFFF, 0x9C00C060,
862 0xFFFD0000, 0x0000FFFF, 0x0000E000, 0x00000000,
863 0xFFFFFFFF, 0x0002003F, 0x00000000, 0x00000000,
864 0x00000000, 0x00000000, 0x00000000, 0x00000000,
865 0x00000000, 0x00000000, 0x00000000, 0x00000000,
866 0xFFFFFFF0, 0x23FFFFFF, 0xFF010000, 0x00000003,
867 0xFFF99FE0, 0x23C5FDFF, 0xB0000000, 0x00030003,
868 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
869 0xFFFBBFE0, 0x23EDFDFF, 0x00010000, 0x00000003,
870 0xFFF99FE0, 0x23EDFDFF, 0xB0000000, 0x00020003,
871 0xD63DC7E8, 0x03BFC718, 0x00000000, 0x00000000,
872 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
873 0xFFFDDFE0, 0x23EFFDFF, 0x40000000, 0x00000003,
874 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
875 0xFC7FFFE0, 0x2FFBFFFF, 0x0000007F, 0x00000000,
876 0xFFFFFFFE, 0x000DFFFF, 0x0000007F, 0x00000000,
877 0xFEF02596, 0x200DECAE, 0x3000005F, 0x00000000,
878 0x00000001, 0x00000000, 0xFFFFFEFF, 0x000007FF,
879 0x00000F00, 0x00000000, 0x00000000, 0x00000000,
880 0xFFFFFFFF, 0x000006FB, 0x003F0000, 0x00000000,
881 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
882 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
883 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
884 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
885 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
886 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x00000000,
887 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
888 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
889 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
890 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
891 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
892 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
893 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
894 0x0003DFFF, 0x0003FFFF, 0x0003FFFF, 0x0001DFFF,
895 0xFFFFFFFF, 0x000FFFFF, 0x10800000, 0x00000000,
896 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
897 0xFFFFFFFF, 0x000001FF, 0x00000000, 0x00000000,
898 0x1FFFFFFF, 0x00000000, 0xFFFF0000, 0x001F3FFF,
899 0x00000000, 0x00000000, 0x00000000, 0x00000000,
900 0x00000000, 0x00000000, 0x00000000, 0x00000000,
901 0x00000000, 0x00000000, 0x00000000, 0x00000000,
902 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
903 0x00000000, 0x00000000, 0x00000000, 0x00000000,
904 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
905 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
906 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
907 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
908 0x00000000, 0x00000000, 0x00000000, 0x80020000,
909 0x00000000, 0x00000000, 0x00000000, 0x00000000,
910 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
911 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
912 0x000000E0, 0x1F3E03FE, 0xFFFFFFFE, 0xFFFFFFFF,
913 0xE07FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xF7FFFFFF,
914 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
915 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
916 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
917 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
918 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
919 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
920 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
921 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
922 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
923 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
924 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
925 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
926 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
927 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
928 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
929 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
930 0x00000000, 0x00000000, 0x00000000, 0x00000000,
931 0x00000000, 0x00000000, 0x00000000, 0x00000000,
932 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
933 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
934 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
935 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
936 0x00000000, 0x00000000, 0x00000000, 0x00000000,
937 0x00000000, 0x00000000, 0x00000000, 0x00000000,
938 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
939 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
940 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
941 0x00000000, 0x00000000, 0x00000000, 0x00000000,
942 0xA0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
943 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
944 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
945 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
946 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
947 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
948 0x00000000, 0x00000000, 0x00000000, 0xFFDF0000,
949 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x1FFFFFFF,
950 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0xFFFFFFC0,
951 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x00000000,
952 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
953 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
954 0x00000000, 0x00000000, 0x00000000, 0x00000000,
955 0x00000000, 0x00000000, 0x00000000, 0x00000000,
956 0x00000000, 0x00000000, 0x00000000, 0x00000000,
957 0x00000000, 0x00000000, 0x00000000, 0x00000000,
958 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
959 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
960 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
961 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
962 0x00000000, 0x00000000, 0x00000000, 0x00000000,
963 0x00000000, 0x00000000, 0x00000000, 0x00000000,
964 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
965 0x00000000, 0x00000000, 0x00000000, 0x00000000,
966 0x00000000, 0x00000000, 0x00000000, 0x00000000,
967 0x00000000, 0x00000000, 0x00000000, 0x00000000,
968 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
969 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
970 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
971 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
972 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
974 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
975 0xFFFFFDFF, 0xFFFFFDFF, 0x000003F7, 0x00000000,
976 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
978 0x00000000, 0x00000000, 0x00000000, 0x00000000,
979 0x00000000, 0x00000000, 0x00000000, 0x00000000,
980 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
981 0x00000000, 0x00000000, 0x00000000, 0x00000000,
982 0x00000000, 0x00000000, 0x00000000, 0x00000000,
983 0x00000000, 0x00000000, 0x00000000, 0x00000000
987 /* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, Nd, Pc, Mn, Mc, Cf,
988 as of Unicode 4.0. */
995 /*unsigned*/ int level3[36 << 4];
997 table_identifier_part =
1001 16, 144, 272, -1, -1, -1, -1, -1,
1002 -1, -1, -1, -1, -1, -1, 400
1005 528, 544, 560, 576, 592, 608, 624, 640,
1006 656, 672, 688, 704, 720, -1, 736, 752,
1007 768, -1, -1, -1, -1, -1, -1, -1,
1008 784, -1, 800, 800, 800, 800, 800, 800,
1009 800, 800, 800, 800, 800, 800, 816, 800,
1010 800, 800, 800, 800, 800, 800, 800, 800,
1011 800, 800, 800, 800, 800, 800, 800, 800,
1012 800, 800, 800, 800, 800, 800, 800, 800,
1013 800, 800, 800, 800, 800, 800, 800, 800,
1014 800, 800, 800, 800, 800, 800, 800, 832,
1015 800, 800, 848, -1, -1, -1, 800, 800,
1016 800, 800, 800, 800, 800, 800, 800, 800,
1017 800, 800, 800, 800, 800, 800, 800, 800,
1018 800, 800, 800, 864, -1, -1, -1, -1,
1019 -1, -1, -1, -1, -1, -1, -1, -1,
1020 -1, -1, -1, -1, 880, 896, 912, 928,
1021 944, 960, 976, -1, 992, -1, -1, -1,
1022 -1, -1, -1, -1, -1, -1, -1, -1,
1023 -1, -1, -1, -1, -1, -1, -1, -1,
1024 -1, -1, -1, -1, -1, -1, -1, -1,
1025 -1, -1, -1, -1, -1, -1, -1, -1,
1026 -1, -1, -1, -1, -1, -1, -1, -1,
1027 -1, -1, -1, -1, -1, -1, -1, -1,
1028 -1, -1, -1, -1, -1, -1, -1, -1,
1029 -1, -1, -1, -1, -1, -1, -1, -1,
1030 -1, -1, -1, -1, -1, -1, -1, -1,
1031 -1, -1, -1, -1, -1, -1, -1, -1,
1032 -1, -1, -1, -1, -1, -1, -1, -1,
1033 -1, -1, -1, -1, -1, -1, -1, -1,
1034 1008, -1, 1024, 1040, -1, -1, -1, -1,
1035 -1, -1, -1, -1, -1, -1, -1, -1,
1036 -1, -1, -1, -1, -1, -1, -1, -1,
1037 800, 800, 800, 800, 800, 800, 800, 800,
1038 800, 800, 800, 800, 800, 800, 800, 800,
1039 800, 800, 800, 800, 800, 800, 800, 800,
1040 800, 800, 800, 800, 800, 800, 800, 800,
1041 800, 800, 800, 800, 800, 800, 800, 800,
1042 800, 800, 800, 800, 800, 800, 800, 800,
1043 800, 800, 800, 800, 800, 800, 800, 800,
1044 800, 800, 800, 800, 800, 800, 800, 800,
1045 800, 800, 800, 800, 800, 800, 800, 800,
1046 800, 800, 800, 800, 800, 800, 800, 800,
1047 800, 800, 800, 1056, -1, -1, -1, -1,
1048 -1, -1, -1, -1, -1, -1, -1, -1,
1049 -1, -1, -1, -1, -1, -1, -1, -1,
1050 -1, -1, -1, -1, -1, -1, -1, -1,
1051 -1, -1, -1, -1, -1, -1, -1, -1,
1052 -1, -1, -1, -1, 800, 1072, -1, -1,
1053 1088, -1, -1, -1, -1, -1, -1, -1,
1054 -1, -1, -1, -1, -1, -1, -1, -1,
1055 -1, -1, -1, -1, -1, -1, -1, -1,
1056 -1, -1, -1, -1, -1, -1, -1, -1,
1057 -1, -1, -1, -1, -1, -1, -1, -1,
1058 -1, -1, -1, -1, -1, -1, -1, -1,
1059 -1, -1, -1, -1, -1, -1, -1, -1,
1060 -1, -1, -1, -1, -1, -1, -1, -1,
1061 -1, -1, -1, -1, -1, -1, -1, -1,
1062 -1, -1, -1, -1, -1, -1, -1, -1,
1063 -1, -1, -1, -1, -1, -1, -1, -1,
1064 -1, -1, -1, -1, -1, -1, -1, -1,
1065 -1, -1, -1, -1, -1, -1, -1, -1,
1066 -1, -1, -1, -1, -1, -1, -1, -1,
1067 -1, -1, -1, -1, -1, -1, -1, -1,
1068 -1, -1, -1, -1, -1, -1, -1, -1
1071 0x00000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE,
1072 0x00000000, 0x04202400, 0xFF7FFFFF, 0xFF7FFFFF,
1073 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1074 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1075 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
1076 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
1077 0xFFFFFFFF, 0xFFFFFFFF, 0xE0FFFFFF, 0x0400FFFF,
1078 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
1079 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1080 0xFFFFFC7B, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
1081 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
1082 0xFFFE00FF, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
1083 0x003F000F, 0x07FFFFFE, 0x01FFFFFF, 0xFFFFC3FF,
1084 0xFFFFFFFF, 0xFFFFFFFF, 0xBFEFFFFF, 0x9FFFFDFF,
1085 0xFFFF8000, 0xFFFFFFFF, 0x0000E7FF, 0x00000000,
1086 0xFFFFFFFF, 0x0003FFFF, 0x00000000, 0x00000000,
1087 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1088 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1089 0xFFFFFFFE, 0xF3FFFFFF, 0xFF1F3FFF, 0x0000FFCF,
1090 0xFFF99FEE, 0xF3C5FDFF, 0xB080399F, 0x0003FFCF,
1091 0xFFF987EE, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
1092 0xFFFBBFEE, 0xF3EDFDFF, 0x00013BBF, 0x0000FFCF,
1093 0xFFF99FEE, 0xF3EDFDFF, 0xB0C0398F, 0x0002FFC3,
1094 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
1095 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
1096 0xFFFDDFEC, 0xF3EFFDFF, 0x40603DDF, 0x0000FFC3,
1097 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
1098 0xFC7FFFEC, 0x2FFBFFFF, 0xFF5F847F, 0x000C0000,
1099 0xFFFFFFFE, 0x07FFFFFF, 0x03FF7FFF, 0x00000000,
1100 0xFEF02596, 0x3BFFECAE, 0x33FF3F5F, 0x00000000,
1101 0x03000001, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE07FF,
1102 0xFEFF0FDF, 0x1FFFFFFF, 0x00000040, 0x00000000,
1103 0xFFFFFFFF, 0x03C7F6FB, 0x03FF03FF, 0x00000000,
1104 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
1105 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
1106 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
1107 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
1108 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
1109 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x0003FE00,
1110 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
1111 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1112 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1113 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1114 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1115 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
1116 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
1117 0x001FDFFF, 0x001FFFFF, 0x000FFFFF, 0x000DDFFF,
1118 0xFFFFFFFF, 0xFFFFFFFF, 0x308FFFFF, 0x000003FF,
1119 0x03FF3800, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
1120 0xFFFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
1121 0x1FFFFFFF, 0x0FFF0FFF, 0xFFFFFFC0, 0x001F3FFF,
1122 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1123 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1124 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1125 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
1126 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1127 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1128 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
1129 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
1130 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
1131 0x0000F000, 0x80007C00, 0x00100001, 0x8002FC0F,
1132 0x00000000, 0x00000000, 0x1FFF0000, 0x000007E2,
1133 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
1134 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
1135 0x000000E0, 0x1F3EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
1136 0xE67FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF,
1137 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
1138 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
1139 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1140 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1141 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1142 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1143 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1144 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1145 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1146 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
1147 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1148 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1149 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1150 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
1151 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1152 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
1153 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1154 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1155 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1156 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1157 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1158 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
1159 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1160 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1161 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1162 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1163 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
1164 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1165 0xE0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
1166 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
1167 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1168 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1169 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
1170 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
1171 0x0000FFFF, 0x0018000F, 0x0000E000, 0xFFDF0000,
1172 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x9FFFFFFF,
1173 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFE0,
1174 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x0E000000,
1175 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
1176 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
1177 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1178 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1179 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1180 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1181 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
1182 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
1183 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1184 0x3FFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
1185 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1186 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1187 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
1188 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1189 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1190 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1191 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1192 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1193 0x00000000, 0x00000000, 0x00000000, 0xFFFFE3E0,
1194 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000,
1195 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
1196 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
1197 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
1198 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1199 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1200 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
1201 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
1202 0xFFFFFDFF, 0xFFFFFDFF, 0xFFFFC3F7, 0xFFFFFFFF,
1203 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1204 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
1205 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1206 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1207 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
1208 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1209 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1210 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1211 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1212 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1213 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1214 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF
1218 /* Return true if a given character can occur as first character of an
1219 identifier. See ECMA-334 section 9.4.2. */
1221 is_identifier_start (int c)
1223 return bitmap_lookup (&table_identifier_start, c);
1224 /* In ASCII only this would be:
1225 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_');
1229 /* Return true if a given character can occur as character of an identifier.
1230 See ECMA-334 section 9.4.2. */
1232 is_identifier_part (int c)
1234 return bitmap_lookup (&table_identifier_part, c);
1235 /* In ASCII only this would be:
1236 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
1237 || (c >= '0' && c <= '9') || c == '_');
1242 is_any_character (int c)
1248 /* ======================= Preprocessor directives. ======================= */
1251 /* Phase 5: Remove preprocessor lines. See ECMA-334 section 9.5.
1252 As a side effect, this also removes initial whitespace on every line;
1253 this whitespace doesn't matter. */
1255 static int phase5_pushback[10];
1256 static int phase5_pushback_length;
1263 if (phase5_pushback_length)
1264 return phase5_pushback[--phase5_pushback_length];
1272 while (c != UEOF && is_whitespace (c));
1276 /* Ignore the entire line containing the preprocessor directive
1277 (including the // comment if it contains one). */
1280 while (c != UEOF && c != UNL);
1292 phase5_ungetc (int c)
1296 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1298 phase5_pushback[phase5_pushback_length++] = c;
1304 /* ========================== Reading of tokens. ========================== */
1309 token_type_lparen, /* ( */
1310 token_type_rparen, /* ) */
1311 token_type_lbrace, /* { */
1312 token_type_rbrace, /* } */
1313 token_type_comma, /* , */
1314 token_type_dot, /* . */
1315 token_type_string_literal, /* "abc", @"abc" */
1316 token_type_number, /* 1.23 */
1317 token_type_symbol, /* identifier, keyword, null */
1318 token_type_plus, /* + */
1319 token_type_other /* character literal, misc. operator */
1321 typedef enum token_type_ty token_type_ty;
1323 typedef struct token_ty token_ty;
1327 char *string; /* for token_type_string_literal, token_type_symbol */
1328 refcounted_string_list_ty *comment; /* for token_type_string_literal */
1330 int logical_line_number;
1334 /* Free the memory pointed to by a 'struct token_ty'. */
1336 free_token (token_ty *tp)
1338 if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
1340 if (tp->type == token_type_string_literal)
1341 drop_reference (tp->comment);
1345 /* Read a Unicode escape sequence outside string/character literals.
1346 Reject Unicode escapes that don't fulfill the given predicate.
1347 See ECMA-334 section 9.4.2. */
1349 do_getc_unicode_escaped (bool (*predicate) (int))
1353 /* Use phase 3, because phase 4 elides comments. */
1357 if (c == 'u' || c == 'U')
1359 unsigned char buf[8];
1364 expect = (c == 'U' ? 8 : 4);
1366 for (i = 0; i < expect; i++)
1368 int c1 = phase3_getc ();
1370 if (c1 >= '0' && c1 <= '9')
1371 n = (n << 4) + (c1 - '0');
1372 else if (c1 >= 'A' && c1 <= 'F')
1373 n = (n << 4) + (c1 - 'A' + 10);
1374 else if (c1 >= 'a' && c1 <= 'f')
1375 n = (n << 4) + (c1 - 'a' + 10);
1380 phase3_ungetc (buf[i]);
1390 error_with_progname = false;
1391 error (0, 0, _("%s:%d: warning: invalid Unicode character"),
1392 logical_file_name, line_number);
1393 error_with_progname = true;
1395 else if (predicate (n))
1399 phase3_ungetc (buf[i]);
1406 /* Read an escape sequence inside a string literal or character literal.
1407 See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
1415 /* Use phase 3, because phase 4 elides comments. */
1449 phase3_ungetc ('x');
1452 case '0': case '1': case '2': case '3': case '4':
1453 case '5': case '6': case '7': case '8': case '9':
1454 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1455 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1466 case '0': case '1': case '2': case '3': case '4':
1467 case '5': case '6': case '7': case '8': case '9':
1468 n = n * 16 + c - '0';
1470 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1471 n = n * 16 + 10 + c - 'A';
1473 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1474 n = n * 16 + 10 + c - 'a';
1484 return do_getc_unicode_escaped (is_any_character);
1486 /* Invalid escape sequence. */
1492 /* Read a regular string literal or character literal.
1493 See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
1495 accumulate_escaped (struct mixed_string_buffer *literal, int delimiter)
1501 /* Use phase 3, because phase 4 elides comments. */
1503 if (c == UEOF || c == delimiter)
1508 error_with_progname = false;
1509 if (delimiter == '\'')
1510 error (0, 0, _("%s:%d: warning: unterminated character constant"),
1511 logical_file_name, line_number);
1513 error (0, 0, _("%s:%d: warning: unterminated string constant"),
1514 logical_file_name, line_number);
1515 error_with_progname = true;
1519 c = do_getc_escaped ();
1521 mixed_string_buffer_append_unicode (literal, c);
1526 /* Combine characters into tokens. Discard whitespace. */
1528 /* Maximum used guaranteed to be < 4. */
1529 static token_ty phase6_pushback[4];
1530 static int phase6_pushback_length;
1533 phase6_get (token_ty *tp)
1537 if (phase6_pushback_length)
1539 *tp = phase6_pushback[--phase6_pushback_length];
1546 tp->line_number = line_number;
1547 tp->logical_line_number = logical_line_number;
1552 tp->type = token_type_eof;
1559 if (last_non_comment_line > last_comment_line)
1560 savable_comment_reset ();
1565 /* Ignore whitespace and comments. */
1569 last_non_comment_line = tp->logical_line_number;
1574 tp->type = token_type_lparen;
1578 tp->type = token_type_rparen;
1582 tp->type = token_type_lbrace;
1586 tp->type = token_type_rbrace;
1590 tp->type = token_type_comma;
1595 if (!(c >= '0' && c <= '9'))
1598 tp->type = token_type_dot;
1603 case '0': case '1': case '2': case '3': case '4':
1604 case '5': case '6': case '7': case '8': case '9':
1606 /* Don't need to verify the complicated syntax of integers and
1607 floating-point numbers. We assume a valid C# input.
1608 The simplified syntax that we recognize as number is: any
1609 sequence of alphanumeric characters, additionally '+' and '-'
1610 immediately after 'e' or 'E' except in hexadecimal numbers. */
1611 bool hexadecimal = false;
1616 if (c >= '0' && c <= '9')
1618 if ((c >= 'A' && c <= 'Z') || (c >= 'a' &&c <= 'z'))
1620 if (c == 'X' || c == 'x')
1622 if ((c == 'E' || c == 'e') && !hexadecimal)
1625 if (!(c == '+' || c == '-'))
1635 tp->type = token_type_number;
1640 /* Regular string literal. */
1642 struct mixed_string_buffer *literal;
1644 lexical_context = lc_string;
1645 literal = mixed_string_buffer_alloc (lexical_context,
1647 logical_line_number);
1648 accumulate_escaped (literal, '"');
1649 tp->string = mixed_string_buffer_done (literal);
1650 tp->comment = add_reference (savable_comment);
1651 lexical_context = lc_outside;
1652 tp->type = token_type_string_literal;
1657 /* Character literal. */
1659 accumulate_escaped (NULL, '\'');
1660 tp->type = token_type_other;
1668 tp->type = token_type_other;
1671 tp->type = token_type_other;
1676 tp->type = token_type_plus;
1684 /* Verbatim string literal. */
1685 struct string_buffer literal;
1687 lexical_context = lc_string;
1688 init_string_buffer (&literal);
1691 /* Use phase 2, because phase 4 elides comments and phase 3
1692 mixes up the newline characters. */
1705 /* No special treatment of newline and backslash here. */
1706 string_buffer_append_unicode (&literal, c);
1708 tp->string = xstrdup (string_buffer_result (&literal));
1709 free_string_buffer (&literal);
1710 tp->comment = add_reference (savable_comment);
1711 lexical_context = lc_outside;
1712 tp->type = token_type_string_literal;
1715 /* FALLTHROUGH, so that @identifier is recognized. */
1719 c = do_getc_unicode_escaped (is_identifier_start);
1720 if (is_identifier_start (c))
1722 static struct string_buffer buffer;
1723 buffer.utf8_buflen = 0;
1726 string_buffer_append_unicode (&buffer, c);
1729 c = do_getc_unicode_escaped (is_identifier_part);
1730 if (!is_identifier_part (c))
1734 tp->string = xstrdup (string_buffer_result (&buffer));
1735 tp->type = token_type_symbol;
1740 /* Misc. operator. */
1741 tp->type = token_type_other;
1748 /* Supports 3 tokens of pushback. */
1750 phase6_unget (token_ty *tp)
1752 if (tp->type != token_type_eof)
1754 if (phase6_pushback_length == SIZEOF (phase6_pushback))
1756 phase6_pushback[phase6_pushback_length++] = *tp;
1761 /* Compile-time optimization of string literal concatenation.
1762 Combine "string1" + ... + "stringN" to the concatenated string if
1763 - the token after this expression is not '.' (because then the last
1764 string could be part of a method call expression). */
1766 static token_ty phase7_pushback[2];
1767 static int phase7_pushback_length;
1770 phase7_get (token_ty *tp)
1772 if (phase7_pushback_length)
1774 *tp = phase7_pushback[--phase7_pushback_length];
1779 if (tp->type == token_type_string_literal)
1781 char *sum = tp->string;
1782 size_t sum_len = strlen (sum);
1788 phase6_get (&token2);
1789 if (token2.type == token_type_plus)
1793 phase6_get (&token3);
1794 if (token3.type == token_type_string_literal)
1796 token_ty token_after;
1798 phase6_get (&token_after);
1799 if (token_after.type != token_type_dot)
1801 char *addend = token3.string;
1802 size_t addend_len = strlen (addend);
1804 sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
1805 memcpy (sum + sum_len, addend, addend_len + 1);
1806 sum_len += addend_len;
1808 phase6_unget (&token_after);
1809 free_token (&token3);
1810 free_token (&token2);
1813 phase6_unget (&token_after);
1815 phase6_unget (&token3);
1817 phase6_unget (&token2);
1824 /* Supports 2 tokens of pushback. */
1826 phase7_unget (token_ty *tp)
1828 if (tp->type != token_type_eof)
1830 if (phase7_pushback_length == SIZEOF (phase7_pushback))
1832 phase7_pushback[phase7_pushback_length++] = *tp;
1838 x_csharp_lex (token_ty *tp)
1843 /* Supports 2 tokens of pushback. */
1845 x_csharp_unlex (token_ty *tp)
1851 /* ========================= Extracting strings. ========================== */
1854 /* Context lookup table. */
1855 static flag_context_list_table_ty *flag_context_list_table;
1858 /* The file is broken into tokens. Scan the token stream, looking for
1859 a keyword, followed by a left paren, followed by a string. When we
1860 see this sequence, we have something to remember. We assume we are
1861 looking at a valid C or C++ program, and leave the complaints about
1862 the grammar to the compiler.
1864 Normal handling: Look for
1865 keyword ( ... msgid ... )
1866 Plural handling: Look for
1867 keyword ( ... msgid ... msgid_plural ... )
1869 We use recursion because the arguments before msgid or between msgid
1870 and msgid_plural can contain subexpressions of the same form. */
1873 /* Extract messages until the next balanced closing parenthesis or brace,
1874 depending on TERMINATOR.
1875 Extracted messages are added to MLP.
1876 Return true upon eof, false upon closing parenthesis or brace. */
1878 extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
1879 flag_context_ty outer_context,
1880 flag_context_list_iterator_ty context_iter,
1881 struct arglist_parser *argparser)
1883 /* Current argument number. */
1885 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
1887 /* Parameters of the keyword just seen. Defined only in state 1. */
1888 const struct callshapes *next_shapes = NULL;
1889 /* Context iterator that will be used if the next token is a '('. */
1890 flag_context_list_iterator_ty next_context_iter =
1891 passthrough_context_list_iterator;
1892 /* Current context. */
1893 flag_context_ty inner_context =
1894 inherited_context (outer_context,
1895 flag_context_list_iterator_advance (&context_iter));
1897 /* Start state is 0. */
1904 x_csharp_lex (&token);
1907 case token_type_symbol:
1909 /* Combine symbol1 . ... . symbolN to a single strings, so that
1910 we can recognize static function calls like
1911 GettextResource.gettext. The information present for
1912 symbolI.....symbolN has precedence over the information for
1913 symbolJ.....symbolN with J > I. */
1914 char *sum = token.string;
1915 size_t sum_len = strlen (sum);
1916 const char *dottedname;
1917 flag_context_list_ty *context_list;
1923 x_csharp_lex (&token2);
1924 if (token2.type == token_type_dot)
1928 x_csharp_lex (&token3);
1929 if (token3.type == token_type_symbol)
1931 char *addend = token3.string;
1932 size_t addend_len = strlen (addend);
1935 (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
1937 memcpy (sum + sum_len + 1, addend, addend_len + 1);
1938 sum_len += 1 + addend_len;
1940 free_token (&token3);
1941 free_token (&token2);
1944 x_csharp_unlex (&token3);
1946 x_csharp_unlex (&token2);
1950 for (dottedname = sum;;)
1952 void *keyword_value;
1954 if (hash_find_entry (&keywords, dottedname, strlen (dottedname),
1958 next_shapes = (const struct callshapes *) keyword_value;
1963 dottedname = strchr (dottedname, '.');
1964 if (dottedname == NULL)
1972 for (dottedname = sum;;)
1975 flag_context_list_table_lookup (
1976 flag_context_list_table,
1977 dottedname, strlen (dottedname));
1978 if (context_list != NULL)
1981 dottedname = strchr (dottedname, '.');
1982 if (dottedname == NULL)
1986 next_context_iter = flag_context_list_iterator (context_list);
1992 case token_type_lparen:
1993 if (extract_parenthesized (mlp, token_type_rparen,
1994 inner_context, next_context_iter,
1995 arglist_parser_alloc (mlp,
1996 state ? next_shapes : NULL)))
1998 xgettext_current_source_encoding = po_charset_utf8;
1999 arglist_parser_done (argparser, arg);
2000 xgettext_current_source_encoding = xgettext_global_source_encoding;
2003 next_context_iter = null_context_list_iterator;
2007 case token_type_rparen:
2008 if (terminator == token_type_rparen)
2010 xgettext_current_source_encoding = po_charset_utf8;
2011 arglist_parser_done (argparser, arg);
2012 xgettext_current_source_encoding = xgettext_global_source_encoding;
2015 if (terminator == token_type_rbrace)
2017 error_with_progname = false;
2019 _("%s:%d: warning: ')' found where '}' was expected"),
2020 logical_file_name, token.line_number);
2021 error_with_progname = true;
2023 next_context_iter = null_context_list_iterator;
2027 case token_type_lbrace:
2028 if (extract_parenthesized (mlp, token_type_rbrace,
2029 null_context, null_context_list_iterator,
2030 arglist_parser_alloc (mlp, NULL)))
2032 xgettext_current_source_encoding = po_charset_utf8;
2033 arglist_parser_done (argparser, arg);
2034 xgettext_current_source_encoding = xgettext_global_source_encoding;
2037 next_context_iter = null_context_list_iterator;
2041 case token_type_rbrace:
2042 if (terminator == token_type_rbrace)
2044 xgettext_current_source_encoding = po_charset_utf8;
2045 arglist_parser_done (argparser, arg);
2046 xgettext_current_source_encoding = xgettext_global_source_encoding;
2049 if (terminator == token_type_rparen)
2051 error_with_progname = false;
2053 _("%s:%d: warning: '}' found where ')' was expected"),
2054 logical_file_name, token.line_number);
2055 error_with_progname = true;
2057 next_context_iter = null_context_list_iterator;
2061 case token_type_comma:
2064 inherited_context (outer_context,
2065 flag_context_list_iterator_advance (
2067 next_context_iter = passthrough_context_list_iterator;
2071 case token_type_string_literal:
2074 pos.file_name = logical_file_name;
2075 pos.line_number = token.line_number;
2077 xgettext_current_source_encoding = po_charset_utf8;
2079 remember_a_message (mlp, NULL, token.string, inner_context,
2080 &pos, NULL, token.comment);
2082 arglist_parser_remember (argparser, arg, token.string,
2084 pos.file_name, pos.line_number,
2086 xgettext_current_source_encoding = xgettext_global_source_encoding;
2088 drop_reference (token.comment);
2089 next_context_iter = null_context_list_iterator;
2093 case token_type_eof:
2094 xgettext_current_source_encoding = po_charset_utf8;
2095 arglist_parser_done (argparser, arg);
2096 xgettext_current_source_encoding = xgettext_global_source_encoding;
2099 case token_type_dot:
2100 case token_type_number:
2101 case token_type_plus:
2102 case token_type_other:
2103 next_context_iter = null_context_list_iterator;
2115 extract_csharp (FILE *f,
2116 const char *real_filename, const char *logical_filename,
2117 flag_context_list_table_ty *flag_table,
2118 msgdomain_list_ty *mdlp)
2120 message_list_ty *mlp = mdlp->item[0]->messages;
2123 real_file_name = real_filename;
2124 logical_file_name = xstrdup (logical_filename);
2127 lexical_context = lc_outside;
2129 logical_line_number = 1;
2130 last_comment_line = -1;
2131 last_non_comment_line = -1;
2133 flag_context_list_table = flag_table;
2137 /* Eat tokens until eof is seen. When extract_parenthesized returns
2138 due to an unbalanced closing parenthesis, just restart it. */
2139 while (!extract_parenthesized (mlp, token_type_eof,
2140 null_context, null_context_list_iterator,
2141 arglist_parser_alloc (mlp, NULL)))
2145 real_file_name = NULL;
2146 logical_file_name = NULL;