2 * Copyright © 2011 Codethink Limited
3 * Copyright © 2011 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Codethink Author(s): Ryan Lortie
26 * Google Author(s): Behdad Esfahbod
31 /* Unit tests for hb-unicode.h */
42 /* Check all properties */
44 /* Some of the following tables where adapted from glib/glib/tests/utf8-misc.c.
45 * The license is compatible. */
48 hb_codepoint_t unicode;
52 static const test_pair_t combining_class_tests[] =
90 static const test_pair_t combining_class_tests_more[] =
92 /* Unicode-5.2 character additions */
95 /* Unicode-6.0 character additions */
101 static const test_pair_t eastasian_width_tests[] =
142 static const test_pair_t eastasian_width_tests_more[] =
144 /* Default Wide blocks */
153 /* Uniode-5.2 character additions */
157 /* Uniode-6.0 character additions */
165 static const test_pair_t general_category_tests[] =
167 { 0x000D, HB_UNICODE_GENERAL_CATEGORY_CONTROL },
168 { 0x200E, HB_UNICODE_GENERAL_CATEGORY_FORMAT },
169 { 0x0378, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED },
170 { 0xE000, HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE },
171 { 0xD800, HB_UNICODE_GENERAL_CATEGORY_SURROGATE },
172 { 0x0061, HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER },
173 { 0x02B0, HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER },
174 { 0x3400, HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER },
175 { 0x01C5, HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER },
176 { 0xFF21, HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER },
177 { 0x0903, HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK },
178 { 0x20DD, HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK },
179 { 0xA806, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK },
180 { 0xFF10, HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER },
181 { 0x16EE, HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER },
182 { 0x17F0, HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER },
183 { 0x005F, HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION },
184 { 0x058A, HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION },
185 { 0x0F3B, HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION },
186 { 0x2019, HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION },
187 { 0x2018, HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION },
188 { 0x2016, HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION },
189 { 0x0F3A, HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION },
190 { 0x20A0, HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL },
191 { 0x309B, HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL },
192 { 0xFB29, HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL },
193 { 0x00A6, HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL },
194 { 0x2028, HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR },
195 { 0x2029, HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR },
196 { 0x202F, HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR },
198 { 0x111111, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED }
200 static const test_pair_t general_category_tests_more[] =
202 /* Unicode-5.2 character additions */
203 { 0x1F131, HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL },
205 /* Unicode-6.0 character additions */
206 { 0x0620, HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER },
208 { 0x111111, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED }
211 static const test_pair_t mirroring_tests[] =
213 /* Some characters that do NOT mirror */
218 { 0xE01EF, 0xE01EF },
219 { 0x1D7C3, 0x1D7C3 },
220 { 0x100000, 0x100000 },
222 /* Some characters that do mirror */
242 { 0x111111, 0x111111 },
244 static const test_pair_t mirroring_tests_more[] =
246 /* No new mirroring characters have been encoded in recent Unicode versions. */
247 { 0x111111, 0x111111 }
250 static const test_pair_t script_tests[] =
252 { 0x002A, HB_SCRIPT_COMMON },
253 { 0x0670, HB_SCRIPT_INHERITED },
254 { 0x060D, HB_SCRIPT_ARABIC },
255 { 0x0559, HB_SCRIPT_ARMENIAN },
256 { 0x09CD, HB_SCRIPT_BENGALI },
257 { 0x31B6, HB_SCRIPT_BOPOMOFO },
258 { 0x13A2, HB_SCRIPT_CHEROKEE },
259 { 0x2CFD, HB_SCRIPT_COPTIC },
260 { 0x0482, HB_SCRIPT_CYRILLIC },
261 { 0x10401, HB_SCRIPT_DESERET },
262 { 0x094D, HB_SCRIPT_DEVANAGARI },
263 { 0x1258, HB_SCRIPT_ETHIOPIC },
264 { 0x10FC, HB_SCRIPT_GEORGIAN },
265 { 0x10341, HB_SCRIPT_GOTHIC },
266 { 0x0375, HB_SCRIPT_GREEK },
267 { 0x0A83, HB_SCRIPT_GUJARATI },
268 { 0x0A3C, HB_SCRIPT_GURMUKHI },
269 { 0x3005, HB_SCRIPT_HAN },
270 { 0x1100, HB_SCRIPT_HANGUL },
271 { 0x05BF, HB_SCRIPT_HEBREW },
272 { 0x309F, HB_SCRIPT_HIRAGANA },
273 { 0x0CBC, HB_SCRIPT_KANNADA },
274 { 0x30FF, HB_SCRIPT_KATAKANA },
275 { 0x17DD, HB_SCRIPT_KHMER },
276 { 0x0EDD, HB_SCRIPT_LAO },
277 { 0x0061, HB_SCRIPT_LATIN },
278 { 0x0D3D, HB_SCRIPT_MALAYALAM },
279 { 0x1843, HB_SCRIPT_MONGOLIAN },
280 { 0x1031, HB_SCRIPT_MYANMAR },
281 { 0x169C, HB_SCRIPT_OGHAM },
282 { 0x10322, HB_SCRIPT_OLD_ITALIC },
283 { 0x0B3C, HB_SCRIPT_ORIYA },
284 { 0x16EF, HB_SCRIPT_RUNIC },
285 { 0x0DBD, HB_SCRIPT_SINHALA },
286 { 0x0711, HB_SCRIPT_SYRIAC },
287 { 0x0B82, HB_SCRIPT_TAMIL },
288 { 0x0C03, HB_SCRIPT_TELUGU },
289 { 0x07B1, HB_SCRIPT_THAANA },
290 { 0x0E31, HB_SCRIPT_THAI },
291 { 0x0FD4, HB_SCRIPT_TIBETAN },
292 { 0x1401, HB_SCRIPT_CANADIAN_ABORIGINAL },
293 { 0xA015, HB_SCRIPT_YI },
294 { 0x1700, HB_SCRIPT_TAGALOG },
295 { 0x1720, HB_SCRIPT_HANUNOO },
296 { 0x1740, HB_SCRIPT_BUHID },
297 { 0x1760, HB_SCRIPT_TAGBANWA },
299 /* Unicode-4.0 additions */
300 { 0x2800, HB_SCRIPT_BRAILLE },
301 { 0x10808, HB_SCRIPT_CYPRIOT },
302 { 0x1932, HB_SCRIPT_LIMBU },
303 { 0x10480, HB_SCRIPT_OSMANYA },
304 { 0x10450, HB_SCRIPT_SHAVIAN },
305 { 0x10000, HB_SCRIPT_LINEAR_B },
306 { 0x1950, HB_SCRIPT_TAI_LE },
307 { 0x1039F, HB_SCRIPT_UGARITIC },
309 /* Unicode-4.1 additions */
310 { 0x1980, HB_SCRIPT_NEW_TAI_LUE },
311 { 0x1A1F, HB_SCRIPT_BUGINESE },
312 { 0x2C00, HB_SCRIPT_GLAGOLITIC },
313 { 0x2D6F, HB_SCRIPT_TIFINAGH },
314 { 0xA800, HB_SCRIPT_SYLOTI_NAGRI },
315 { 0x103D0, HB_SCRIPT_OLD_PERSIAN },
316 { 0x10A3F, HB_SCRIPT_KHAROSHTHI },
318 /* Unicode-5.0 additions */
319 { 0x0378, HB_SCRIPT_UNKNOWN },
320 { 0x1B04, HB_SCRIPT_BALINESE },
321 { 0x12000, HB_SCRIPT_CUNEIFORM },
322 { 0x10900, HB_SCRIPT_PHOENICIAN },
323 { 0xA840, HB_SCRIPT_PHAGS_PA },
324 { 0x07C0, HB_SCRIPT_NKO },
326 /* Unicode-5.1 additions */
327 { 0xA900, HB_SCRIPT_KAYAH_LI },
328 { 0x1C00, HB_SCRIPT_LEPCHA },
329 { 0xA930, HB_SCRIPT_REJANG },
330 { 0x1B80, HB_SCRIPT_SUNDANESE },
331 { 0xA880, HB_SCRIPT_SAURASHTRA },
332 { 0xAA00, HB_SCRIPT_CHAM },
333 { 0x1C50, HB_SCRIPT_OL_CHIKI },
334 { 0xA500, HB_SCRIPT_VAI },
335 { 0x102A0, HB_SCRIPT_CARIAN },
336 { 0x10280, HB_SCRIPT_LYCIAN },
337 { 0x1093F, HB_SCRIPT_LYDIAN },
339 { 0x111111, HB_SCRIPT_UNKNOWN }
341 static const test_pair_t script_tests_more[] =
343 /* Unicode-5.2 additions */
344 { 0x10B00, HB_SCRIPT_AVESTAN },
345 { 0xA6A0, HB_SCRIPT_BAMUM },
346 { 0x13000, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS },
347 { 0x10840, HB_SCRIPT_IMPERIAL_ARAMAIC },
348 { 0x10B60, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI },
349 { 0x10B40, HB_SCRIPT_INSCRIPTIONAL_PARTHIAN },
350 { 0xA980, HB_SCRIPT_JAVANESE },
351 { 0x11082, HB_SCRIPT_KAITHI },
352 { 0xA4D0, HB_SCRIPT_LISU },
353 { 0xABE5, HB_SCRIPT_MEETEI_MAYEK },
354 { 0x10A60, HB_SCRIPT_OLD_SOUTH_ARABIAN },
355 { 0x10C00, HB_SCRIPT_OLD_TURKIC },
356 { 0x0800, HB_SCRIPT_SAMARITAN },
357 { 0x1A20, HB_SCRIPT_TAI_THAM },
358 { 0xAA80, HB_SCRIPT_TAI_VIET },
360 /* Unicode-6.0 additions */
361 { 0x1BC0, HB_SCRIPT_BATAK },
362 { 0x11000, HB_SCRIPT_BRAHMI },
363 { 0x0840, HB_SCRIPT_MANDAIC },
365 /* Unicode-5.2 character additions */
366 { 0x1CED, HB_SCRIPT_INHERITED },
367 { 0x1400, HB_SCRIPT_CANADIAN_ABORIGINAL },
369 { 0x111111, HB_SCRIPT_UNKNOWN }
373 typedef unsigned int (*get_func_t) (hb_unicode_funcs_t *ufuncs,
374 hb_codepoint_t unicode,
376 typedef unsigned int (*func_setter_func_t) (hb_unicode_funcs_t *ufuncs,
379 hb_destroy_func_t destroy);
380 typedef unsigned int (*getter_func_t) (hb_unicode_funcs_t *ufuncs,
381 hb_codepoint_t unicode);
385 func_setter_func_t func_setter;
386 getter_func_t getter;
387 const test_pair_t *tests;
388 unsigned int num_tests;
389 const test_pair_t *tests_more;
390 unsigned int num_tests_more;
391 unsigned int default_value;
394 #define RETURNS_UNICODE_ITSELF ((unsigned int) -1)
396 #define PROPERTY(name, DEFAULT) \
399 (func_setter_func_t) hb_unicode_funcs_set_##name##_func, \
400 (getter_func_t) hb_unicode_get_##name, \
402 G_N_ELEMENTS (name##_tests), \
404 G_N_ELEMENTS (name##_tests_more), \
407 static const property_t properties[] =
409 PROPERTY (combining_class, 0),
410 PROPERTY (eastasian_width, 1),
411 PROPERTY (general_category, (unsigned int) HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER),
412 PROPERTY (mirroring, RETURNS_UNICODE_ITSELF),
413 PROPERTY (script, (unsigned int) HB_SCRIPT_UNKNOWN),
418 test_unicode_properties (gconstpointer user_data)
420 hb_unicode_funcs_t *uf = (hb_unicode_funcs_t *) user_data;
423 g_assert (hb_unicode_funcs_is_immutable (uf));
425 for (i = 0; i < G_N_ELEMENTS (properties); i++) {
426 const property_t *p = &properties[i];
427 const test_pair_t *tests;
429 g_test_message ("Testing property %s", p->name);
431 for (j = 0; j < p->num_tests; j++) {
432 g_test_message ("Test %s #%d: U+%04X", p->name, j, tests[j].unicode);
433 g_assert_cmphex (p->getter (uf, tests[j].unicode), ==, tests[j].value);
438 static hb_codepoint_t
439 default_value (hb_codepoint_t default_value, hb_codepoint_t unicode)
441 return default_value == RETURNS_UNICODE_ITSELF ? unicode : default_value;
445 test_unicode_properties_nil (void)
447 hb_unicode_funcs_t *uf = hb_unicode_funcs_create (NULL);
450 g_assert (!hb_unicode_funcs_is_immutable (uf));
452 for (i = 0; i < G_N_ELEMENTS (properties); i++) {
453 const property_t *p = &properties[i];
454 const test_pair_t *tests;
457 for (j = 0; j < p->num_tests; j++)
458 g_assert_cmphex (p->getter (uf, tests[j].unicode), ==, default_value (p->default_value, tests[j].unicode));
461 hb_unicode_funcs_destroy (uf);
464 #define MAGIC0 0x12345678
465 #define MAGIC1 0x76543210
476 data_fixture_init (data_fixture_t *f, gconstpointer user_data)
478 f->data[0].value = MAGIC0;
479 f->data[1].value = MAGIC1;
482 data_fixture_finish (data_fixture_t *f, gconstpointer user_data)
486 static void free_up (void *p)
488 data_t *data = (data_t *) p;
490 g_assert (data->value == MAGIC0 || data->value == MAGIC1);
491 g_assert (data->freed == FALSE);
496 simple_get_script (hb_unicode_funcs_t *ufuncs,
497 hb_codepoint_t codepoint,
500 data_t *data = (data_t *) user_data;
502 g_assert (hb_unicode_funcs_get_parent (ufuncs) == NULL);
503 g_assert (data->value == MAGIC0);
504 g_assert (data->freed == FALSE);
506 if ('a' <= codepoint && codepoint <= 'z')
507 return HB_SCRIPT_LATIN;
509 return HB_SCRIPT_UNKNOWN;
513 a_is_for_arabic_get_script (hb_unicode_funcs_t *ufuncs,
514 hb_codepoint_t codepoint,
517 data_t *data = (data_t *) user_data;
519 g_assert (hb_unicode_funcs_get_parent (ufuncs) != NULL);
520 g_assert (data->value == MAGIC1);
521 g_assert (data->freed == FALSE);
523 if (codepoint == 'a') {
524 return HB_SCRIPT_ARABIC;
526 hb_unicode_funcs_t *parent = hb_unicode_funcs_get_parent (ufuncs);
528 return hb_unicode_get_script (parent, codepoint);
533 test_unicode_custom (data_fixture_t *f, gconstpointer user_data)
535 hb_unicode_funcs_t *uf = hb_unicode_funcs_create (NULL);
537 hb_unicode_funcs_set_script_func (uf, simple_get_script,
538 &f->data[0], free_up);
540 g_assert_cmpint (hb_unicode_get_script (uf, 'a'), ==, HB_SCRIPT_LATIN);
541 g_assert_cmpint (hb_unicode_get_script (uf, '0'), ==, HB_SCRIPT_UNKNOWN);
543 g_assert (!hb_unicode_funcs_is_immutable (uf));
544 hb_unicode_funcs_make_immutable (uf);
545 g_assert (hb_unicode_funcs_is_immutable (uf));
547 /* Since uf is immutable now, the following setter should do nothing. */
548 hb_unicode_funcs_set_script_func (uf, a_is_for_arabic_get_script,
549 &f->data[1], free_up);
551 g_assert (!f->data[0].freed && !f->data[1].freed);
552 hb_unicode_funcs_destroy (uf);
553 g_assert (f->data[0].freed && !f->data[1].freed);
557 test_unicode_subclassing_nil (data_fixture_t *f, gconstpointer user_data)
559 hb_unicode_funcs_t *uf, *aa;
561 uf = hb_unicode_funcs_create (NULL);
563 aa = hb_unicode_funcs_create (uf);
565 hb_unicode_funcs_destroy (uf);
567 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
568 &f->data[1], free_up);
570 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
571 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
573 g_assert (!f->data[0].freed && !f->data[1].freed);
574 hb_unicode_funcs_destroy (aa);
575 g_assert (!f->data[0].freed && f->data[1].freed);
579 test_unicode_subclassing_default (data_fixture_t *f, gconstpointer user_data)
581 hb_unicode_funcs_t *uf, *aa;
583 uf = hb_unicode_funcs_get_default ();
584 aa = hb_unicode_funcs_create (uf);
586 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
587 &f->data[1], free_up);
589 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
590 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
592 g_assert (!f->data[0].freed && !f->data[1].freed);
593 hb_unicode_funcs_destroy (aa);
594 g_assert (!f->data[0].freed && f->data[1].freed);
598 test_unicode_subclassing_deep (data_fixture_t *f, gconstpointer user_data)
600 hb_unicode_funcs_t *uf, *aa;
602 uf = hb_unicode_funcs_create (NULL);
604 hb_unicode_funcs_set_script_func (uf, simple_get_script,
605 &f->data[0], free_up);
607 aa = hb_unicode_funcs_create (uf);
609 hb_unicode_funcs_destroy (uf);
611 /* make sure the 'uf' didn't get freed, since 'aa' holds a ref */
612 g_assert (!f->data[0].freed);
614 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
615 &f->data[1], free_up);
617 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
618 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
619 g_assert_cmpint (hb_unicode_get_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
621 g_assert (!f->data[0].freed && !f->data[1].freed);
622 hb_unicode_funcs_destroy (aa);
623 g_assert (f->data[0].freed && f->data[1].freed);
628 main (int argc, char **argv)
630 hb_test_init (&argc, &argv);
632 hb_test_add (test_unicode_properties_nil);
634 hb_test_add_data_flavor (hb_unicode_funcs_get_default (), "default", test_unicode_properties);
636 hb_test_add_data_flavor (hb_glib_get_unicode_funcs (), "glib", test_unicode_properties);
639 hb_test_add_data_flavor (hb_icu_get_unicode_funcs (), "icu", test_unicode_properties);
642 hb_test_add_fixture (data_fixture, NULL, test_unicode_custom);
643 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_nil);
644 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_default);
645 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_deep);
647 /* XXX test icu ufuncs */
648 /* XXX test _more tests (warn?) */
649 /* XXX test chainup */
650 /* XXX test glib & icu two-way script conversion */
652 return hb_test_run ();