1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003, 2005, 2007, 2009-2013 Free Software Foundation,
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 /* The names according to the UnicodeData.txt file, modified to contain the
29 Hangul syllable names, as described in the Unicode 3.0 book. */
30 const char * unicode_names [0x110000];
32 /* Maximum length of a field in the UnicodeData.txt file. */
35 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
36 Reads up to (but excluding) DELIM.
37 Returns 1 when a field was successfully read, otherwise 0. */
39 getfield (FILE *stream, char *buffer, int delim)
44 for (; (c = getc (stream)), (c != EOF && c != delim); )
46 /* Put c into the buffer. */
47 if (++count >= FIELDLEN - 1)
49 fprintf (stderr, "field too long\n");
62 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
65 fill_names (const char *unicodedata_filename)
69 char field0[FIELDLEN];
70 char field1[FIELDLEN];
73 for (i = 0; i < 0x110000; i++)
74 unicode_names[i] = NULL;
76 stream = fopen (unicodedata_filename, "r");
79 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
89 n = getfield (stream, field0, ';');
90 n += getfield (stream, field1, ';');
95 fprintf (stderr, "short line in '%s':%d\n",
96 unicodedata_filename, lineno);
99 for (; (c = getc (stream)), (c != EOF && c != '\n'); )
101 i = strtoul (field0, NULL, 16);
104 fprintf (stderr, "index too large\n");
107 unicode_names[i] = xstrdup (field1);
109 if (ferror (stream) || fclose (stream))
111 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
116 /* Perform an exhaustive test of the unicode_character_name function. */
122 char buf[UNINAME_MAX];
124 for (i = 0; i < 0x11000; i++)
126 char *result = unicode_character_name (i, buf);
128 if (unicode_names[i] != NULL)
132 fprintf (stderr, "\\u%04X name lookup failed!\n", i);
135 else if (strcmp (result, unicode_names[i]) != 0)
137 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
146 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
153 for (i = 0x110000; i < 0x1000000; i++)
155 char *result = unicode_character_name (i, buf);
159 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
168 /* Perform a test of the unicode_name_character function. */
170 test_inverse_lookup ()
175 /* First, verify all valid character names are recognized. */
176 for (i = 0; i < 0x110000; i++)
177 if (unicode_names[i] != NULL)
179 unsigned int result = unicode_name_character (unicode_names[i]);
182 if (result == UNINAME_INVALID)
183 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
187 "inverse name lookup of \"%s\" returned 0x%04X\n",
188 unicode_names[i], result);
193 /* Second, generate random but likely names and verify they are not
194 recognized unless really valid. */
195 for (i = 0; i < 10000; i++)
200 unsigned int l1, l2, j1, j2;
201 char buf[2*UNINAME_MAX];
204 do i1 = ((rand () % 0x11) << 16)
205 + ((rand () & 0xff) << 8)
207 while (unicode_names[i1] == NULL);
209 do i2 = ((rand () % 0x11) << 16)
210 + ((rand () & 0xff) << 8)
212 while (unicode_names[i2] == NULL);
214 s1 = unicode_names[i1];
216 s2 = unicode_names[i2];
219 /* Concatenate a starting piece of s1 with an ending piece of s2. */
220 for (j1 = 1; j1 <= l1; j1++)
221 if (j1 == l1 || s1[j1] == ' ')
222 for (j2 = 0; j2 < l2; j2++)
223 if (j2 == 0 || s2[j2-1] == ' ')
225 memcpy (buf, s1, j1);
227 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
229 result = unicode_name_character (buf);
230 if (result != UNINAME_INVALID
231 && !(unicode_names[result] != NULL
232 && strcmp (unicode_names[result], buf) == 0))
235 "inverse name lookup of \"%s\" returned 0x%04X\n",
236 unicode_names[i], result);
242 /* Third, some extreme case that used to loop. */
243 if (unicode_name_character ("A A") != UNINAME_INVALID)
250 main (int argc, char *argv[])
254 set_program_name (argv[0]);
256 fill_names (argv[1]);
258 error |= test_name_lookup ();
259 error |= test_inverse_lookup ();