1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003, 2005, 2007, 2009-2015 Free Software Foundation,
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 /* The names according to the UnicodeData.txt file, modified to contain the
29 Hangul syllable names, as described in the Unicode 3.0 book. */
30 static const char * unicode_names [0x110000];
32 /* Maximum entries in unicode_aliases. */
33 #define ALIASLEN 0x200
35 /* The aliases according to the NameAliases.txt file. */
42 static struct unicode_alias unicode_aliases [ALIASLEN];
43 static int aliases_count;
45 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
48 fill_names (const char *unicodedata_filename)
56 stream = fopen (unicodedata_filename, "r");
59 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
63 while (fgets (line, sizeof line, stream))
71 comment = strchr (line, '#');
74 if (line[strspn (line, " \t\r\n")] == '\0')
81 fprintf (stderr, "short line in '%s':%d\n",
82 unicodedata_filename, lineno);
93 fprintf (stderr, "short line in '%s':%d\n",
94 unicodedata_filename, lineno);
98 i = strtoul (field0, NULL, 16);
101 fprintf (stderr, "index too large\n");
104 unicode_names[i] = xstrdup (field1);
106 if (ferror (stream) || fclose (stream))
108 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
113 /* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
116 fill_aliases (const char *namealiases_filename)
124 stream = fopen (namealiases_filename, "r");
127 fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
131 while (fgets (line, sizeof line, stream))
137 comment = strchr (line, '#');
140 if (line[strspn (line, " \t\r\n")] == '\0')
149 fprintf (stderr, "short line in '%s':%d\n",
150 namealiases_filename, lineno);
159 fprintf (stderr, "short line in '%s':%d\n",
160 namealiases_filename, lineno);
165 uc = strtoul (field0, NULL, 16);
168 fprintf (stderr, "index too large\n");
172 if (aliases_count == ALIASLEN)
174 fprintf (stderr, "too many aliases\n");
177 unicode_aliases[aliases_count].name = xstrdup (field1);
178 unicode_aliases[aliases_count].uc = uc;
181 if (ferror (stream) || fclose (stream))
183 fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
189 name_has_alias (unsigned int uc)
192 for (i = 0; i < ALIASLEN; i++)
193 if (unicode_aliases[i].uc == uc)
198 /* Perform an exhaustive test of the unicode_character_name function. */
204 char buf[UNINAME_MAX];
206 for (i = 0; i < 0x11000; i++)
208 char *result = unicode_character_name (i, buf);
210 if (unicode_names[i] != NULL)
214 fprintf (stderr, "\\u%04X name lookup failed!\n", i);
217 else if (strcmp (result, unicode_names[i]) != 0)
219 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
228 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
235 for (i = 0x110000; i < 0x1000000; i++)
237 char *result = unicode_character_name (i, buf);
241 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
250 /* Perform a test of the unicode_name_character function. */
252 test_inverse_lookup ()
257 /* First, verify all valid character names are recognized. */
258 for (i = 0; i < 0x110000; i++)
259 if (unicode_names[i] != NULL)
261 unsigned int result = unicode_name_character (unicode_names[i]);
264 if (result == UNINAME_INVALID)
265 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
269 "inverse name lookup of \"%s\" returned 0x%04X\n",
270 unicode_names[i], result);
275 /* Second, generate random but likely names and verify they are not
276 recognized unless really valid. */
277 for (i = 0; i < 10000; i++)
282 unsigned int l1, l2, j1, j2;
283 char buf[2*UNINAME_MAX];
286 do i1 = ((rand () % 0x11) << 16)
287 + ((rand () & 0xff) << 8)
289 while (unicode_names[i1] == NULL);
291 do i2 = ((rand () % 0x11) << 16)
292 + ((rand () & 0xff) << 8)
294 while (unicode_names[i2] == NULL);
296 s1 = unicode_names[i1];
298 s2 = unicode_names[i2];
301 /* Concatenate a starting piece of s1 with an ending piece of s2. */
302 for (j1 = 1; j1 <= l1; j1++)
303 if (j1 == l1 || s1[j1] == ' ')
304 for (j2 = 0; j2 < l2; j2++)
305 if (j2 == 0 || s2[j2-1] == ' ')
307 memcpy (buf, s1, j1);
309 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
311 result = unicode_name_character (buf);
312 if (result != UNINAME_INVALID
313 && !name_has_alias (result)
314 && !(unicode_names[result] != NULL
315 && strcmp (unicode_names[result], buf) == 0))
318 "inverse name lookup of \"%s\" returned 0x%04X\n",
319 unicode_names[i], result);
325 /* Third, some extreme case that used to loop. */
326 if (unicode_name_character ("A A") != UNINAME_INVALID)
332 /* Perform a test of the unicode_name_character function for aliases. */
338 char buf[UNINAME_MAX];
340 /* Verify all valid character names are recognized. */
341 for (i = 0; i < ALIASLEN; i++)
342 if (unicode_aliases[i].uc != UNINAME_INVALID
343 /* Skip if the character has no canonical name (e.g. control
345 && unicode_character_name (unicode_aliases[i].uc, buf))
347 unsigned int result = unicode_name_character (unicode_aliases[i].name);
348 if (result != unicode_aliases[i].uc)
350 if (result == UNINAME_INVALID)
351 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
352 unicode_aliases[i].name);
355 "inverse name lookup of \"%s\" returned 0x%04X\n",
356 unicode_aliases[i].name, result);
365 main (int argc, char *argv[])
370 set_program_name (argv[0]);
372 for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
373 fill_names (argv[i]);
378 for (j = 0; j < ALIASLEN; j++)
379 unicode_aliases[j].uc = UNINAME_INVALID;
382 for (; i < argc; i++)
383 fill_aliases (argv[i]);
386 error |= test_name_lookup ();
387 error |= test_inverse_lookup ();
389 if (aliases_count > 0)
390 error |= test_alias_lookup ();