2 * Copyright © 2011 Codethink Limited
3 * Copyright © 2011 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Codethink Author(s): Ryan Lortie
26 * Google Author(s): Behdad Esfahbod
31 /* Unit tests for hb-unicode.h */
42 /* Check all properties */
44 /* Some of the following tables where adapted from glib/glib/tests/utf8-misc.c.
45 * The license is compatible. */
48 hb_codepoint_t unicode;
52 static const test_pair_t combining_class_tests[] =
90 static const test_pair_t combining_class_tests_more[] =
92 /* Unicode-5.2 character additions */
95 /* Unicode-6.0 character additions */
101 static const test_pair_t eastasian_width_tests[] =
142 static const test_pair_t eastasian_width_tests_more[] =
144 /* Default Wide blocks */
153 /* Uniode-5.2 character additions */
157 /* Uniode-6.0 character additions */
165 static const test_pair_t general_category_tests[] =
167 { 0x000D, HB_UNICODE_GENERAL_CATEGORY_CONTROL },
168 { 0x200E, HB_UNICODE_GENERAL_CATEGORY_FORMAT },
169 { 0x0378, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED },
170 { 0xE000, HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE },
171 { 0xD800, HB_UNICODE_GENERAL_CATEGORY_SURROGATE },
172 { 0x0061, HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER },
173 { 0x02B0, HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER },
174 { 0x3400, HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER },
175 { 0x01C5, HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER },
176 { 0xFF21, HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER },
177 { 0x0903, HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK },
178 { 0x20DD, HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK },
179 { 0xA806, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK },
180 { 0xFF10, HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER },
181 { 0x16EE, HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER },
182 { 0x17F0, HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER },
183 { 0x005F, HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION },
184 { 0x058A, HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION },
185 { 0x0F3B, HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION },
186 { 0x2019, HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION },
187 { 0x2018, HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION },
188 { 0x2016, HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION },
189 { 0x0F3A, HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION },
190 { 0x20A0, HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL },
191 { 0x309B, HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL },
192 { 0xFB29, HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL },
193 { 0x00A6, HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL },
194 { 0x2028, HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR },
195 { 0x2029, HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR },
196 { 0x202F, HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR },
198 { 0x111111, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED }
200 static const test_pair_t general_category_tests_more[] =
202 /* Unicode-5.2 character additions */
203 { 0x1F131, HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL },
205 /* Unicode-6.0 character additions */
206 { 0x0620, HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER },
208 { 0x111111, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED }
211 static const test_pair_t mirroring_tests[] =
213 /* Some characters that do NOT mirror */
218 { 0xE01EF, 0xE01EF },
219 { 0x1D7C3, 0x1D7C3 },
220 { 0x100000, 0x100000 },
222 /* Some characters that do mirror */
242 { 0x111111, 0x111111 },
244 static const test_pair_t mirroring_tests_more[] =
246 /* No new mirroring characters have been encoded in recent Unicode versions. */
247 { 0x111111, 0x111111 }
250 static const test_pair_t script_tests[] =
252 { 0x002A, HB_SCRIPT_COMMON },
253 { 0x0670, HB_SCRIPT_INHERITED },
254 { 0x060D, HB_SCRIPT_ARABIC },
255 { 0x0559, HB_SCRIPT_ARMENIAN },
256 { 0x09CD, HB_SCRIPT_BENGALI },
257 { 0x31B6, HB_SCRIPT_BOPOMOFO },
258 { 0x13A2, HB_SCRIPT_CHEROKEE },
259 { 0x2CFD, HB_SCRIPT_COPTIC },
260 { 0x0482, HB_SCRIPT_CYRILLIC },
261 { 0x10401, HB_SCRIPT_DESERET },
262 { 0x094D, HB_SCRIPT_DEVANAGARI },
263 { 0x1258, HB_SCRIPT_ETHIOPIC },
264 { 0x10FC, HB_SCRIPT_GEORGIAN },
265 { 0x10341, HB_SCRIPT_GOTHIC },
266 { 0x0375, HB_SCRIPT_GREEK },
267 { 0x0A83, HB_SCRIPT_GUJARATI },
268 { 0x0A3C, HB_SCRIPT_GURMUKHI },
269 { 0x3005, HB_SCRIPT_HAN },
270 { 0x1100, HB_SCRIPT_HANGUL },
271 { 0x05BF, HB_SCRIPT_HEBREW },
272 { 0x309F, HB_SCRIPT_HIRAGANA },
273 { 0x0CBC, HB_SCRIPT_KANNADA },
274 { 0x30FF, HB_SCRIPT_KATAKANA },
275 { 0x17DD, HB_SCRIPT_KHMER },
276 { 0x0EDD, HB_SCRIPT_LAO },
277 { 0x0061, HB_SCRIPT_LATIN },
278 { 0x0D3D, HB_SCRIPT_MALAYALAM },
279 { 0x1843, HB_SCRIPT_MONGOLIAN },
280 { 0x1031, HB_SCRIPT_MYANMAR },
281 { 0x169C, HB_SCRIPT_OGHAM },
282 { 0x10322, HB_SCRIPT_OLD_ITALIC },
283 { 0x0B3C, HB_SCRIPT_ORIYA },
284 { 0x16EF, HB_SCRIPT_RUNIC },
285 { 0x0DBD, HB_SCRIPT_SINHALA },
286 { 0x0711, HB_SCRIPT_SYRIAC },
287 { 0x0B82, HB_SCRIPT_TAMIL },
288 { 0x0C03, HB_SCRIPT_TELUGU },
289 { 0x07B1, HB_SCRIPT_THAANA },
290 { 0x0E31, HB_SCRIPT_THAI },
291 { 0x0FD4, HB_SCRIPT_TIBETAN },
292 { 0x1401, HB_SCRIPT_CANADIAN_ABORIGINAL },
293 { 0xA015, HB_SCRIPT_YI },
294 { 0x1700, HB_SCRIPT_TAGALOG },
295 { 0x1720, HB_SCRIPT_HANUNOO },
296 { 0x1740, HB_SCRIPT_BUHID },
297 { 0x1760, HB_SCRIPT_TAGBANWA },
299 /* Unicode-4.0 additions */
300 { 0x2800, HB_SCRIPT_BRAILLE },
301 { 0x10808, HB_SCRIPT_CYPRIOT },
302 { 0x1932, HB_SCRIPT_LIMBU },
303 { 0x10480, HB_SCRIPT_OSMANYA },
304 { 0x10450, HB_SCRIPT_SHAVIAN },
305 { 0x10000, HB_SCRIPT_LINEAR_B },
306 { 0x1950, HB_SCRIPT_TAI_LE },
307 { 0x1039F, HB_SCRIPT_UGARITIC },
309 /* Unicode-4.1 additions */
310 { 0x1980, HB_SCRIPT_NEW_TAI_LUE },
311 { 0x1A1F, HB_SCRIPT_BUGINESE },
312 { 0x2C00, HB_SCRIPT_GLAGOLITIC },
313 { 0x2D6F, HB_SCRIPT_TIFINAGH },
314 { 0xA800, HB_SCRIPT_SYLOTI_NAGRI },
315 { 0x103D0, HB_SCRIPT_OLD_PERSIAN },
316 { 0x10A3F, HB_SCRIPT_KHAROSHTHI },
318 /* Unicode-5.0 additions */
319 { 0x0378, HB_SCRIPT_UNKNOWN },
320 { 0x1B04, HB_SCRIPT_BALINESE },
321 { 0x12000, HB_SCRIPT_CUNEIFORM },
322 { 0x10900, HB_SCRIPT_PHOENICIAN },
323 { 0xA840, HB_SCRIPT_PHAGS_PA },
324 { 0x07C0, HB_SCRIPT_NKO },
326 /* Unicode-5.1 additions */
327 { 0xA900, HB_SCRIPT_KAYAH_LI },
328 { 0x1C00, HB_SCRIPT_LEPCHA },
329 { 0xA930, HB_SCRIPT_REJANG },
330 { 0x1B80, HB_SCRIPT_SUNDANESE },
331 { 0xA880, HB_SCRIPT_SAURASHTRA },
332 { 0xAA00, HB_SCRIPT_CHAM },
333 { 0x1C50, HB_SCRIPT_OL_CHIKI },
334 { 0xA500, HB_SCRIPT_VAI },
335 { 0x102A0, HB_SCRIPT_CARIAN },
336 { 0x10280, HB_SCRIPT_LYCIAN },
337 { 0x1093F, HB_SCRIPT_LYDIAN },
339 { 0x111111, HB_SCRIPT_UNKNOWN }
341 static const test_pair_t script_tests_more[] =
343 /* Unicode-5.2 additions */
344 { 0x10B00, HB_SCRIPT_AVESTAN },
345 { 0xA6A0, HB_SCRIPT_BAMUM },
346 { 0x13000, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS },
347 { 0x10840, HB_SCRIPT_IMPERIAL_ARAMAIC },
348 { 0x10B60, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI },
349 { 0x10B40, HB_SCRIPT_INSCRIPTIONAL_PARTHIAN },
350 { 0xA980, HB_SCRIPT_JAVANESE },
351 { 0x11082, HB_SCRIPT_KAITHI },
352 { 0xA4D0, HB_SCRIPT_LISU },
353 { 0xABE5, HB_SCRIPT_MEETEI_MAYEK },
354 { 0x10A60, HB_SCRIPT_OLD_SOUTH_ARABIAN },
355 { 0x10C00, HB_SCRIPT_OLD_TURKIC },
356 { 0x0800, HB_SCRIPT_SAMARITAN },
357 { 0x1A20, HB_SCRIPT_TAI_THAM },
358 { 0xAA80, HB_SCRIPT_TAI_VIET },
360 /* Unicode-6.0 additions */
361 { 0x1BC0, HB_SCRIPT_BATAK },
362 { 0x11000, HB_SCRIPT_BRAHMI },
363 { 0x0840, HB_SCRIPT_MANDAIC },
365 /* Unicode-5.2 character additions */
366 { 0x1CED, HB_SCRIPT_INHERITED },
367 { 0x1400, HB_SCRIPT_CANADIAN_ABORIGINAL },
369 { 0x111111, HB_SCRIPT_UNKNOWN }
373 typedef unsigned int (*get_func_t) (hb_unicode_funcs_t *ufuncs,
374 hb_codepoint_t unicode,
376 typedef unsigned int (*func_setter_func_t) (hb_unicode_funcs_t *ufuncs,
379 hb_destroy_func_t destroy);
380 typedef unsigned int (*getter_func_t) (hb_unicode_funcs_t *ufuncs,
381 hb_codepoint_t unicode);
385 func_setter_func_t func_setter;
386 getter_func_t getter;
387 const test_pair_t *tests;
388 unsigned int num_tests;
389 const test_pair_t *tests_more;
390 unsigned int num_tests_more;
391 unsigned int default_value;
394 #define RETURNS_UNICODE_ITSELF ((unsigned int) -1)
396 #define PROPERTY(name, DEFAULT) \
399 (func_setter_func_t) hb_unicode_funcs_set_##name##_func, \
400 (getter_func_t) hb_unicode_get_##name, \
402 G_N_ELEMENTS (name##_tests), \
404 G_N_ELEMENTS (name##_tests_more), \
407 static const property_t properties[] =
409 PROPERTY (combining_class, 0),
410 PROPERTY (eastasian_width, 1),
411 PROPERTY (general_category, (unsigned int) HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER),
412 PROPERTY (mirroring, RETURNS_UNICODE_ITSELF),
413 PROPERTY (script, (unsigned int) HB_SCRIPT_UNKNOWN),
418 test_unicode_properties (gconstpointer user_data)
420 hb_unicode_funcs_t *uf = (hb_unicode_funcs_t *) user_data;
423 g_assert (hb_unicode_funcs_is_immutable (uf));
425 for (i = 0; i < G_N_ELEMENTS (properties); i++) {
426 const property_t *p = &properties[i];
427 const test_pair_t *tests;
429 g_test_message ("Testing property %s", p->name);
431 for (j = 0; j < p->num_tests; j++) {
432 g_test_message ("Test %s #%d: U+%04X", p->name, j, tests[j].unicode);
433 g_assert_cmphex (p->getter (uf, tests[j].unicode), ==, tests[j].value);
438 static hb_codepoint_t
439 default_value (hb_codepoint_t default_value, hb_codepoint_t unicode)
441 return default_value == RETURNS_UNICODE_ITSELF ? unicode : default_value;
445 test_unicode_properties_nil (void)
447 hb_unicode_funcs_t *uf = hb_unicode_funcs_create (NULL);
450 g_assert (!hb_unicode_funcs_is_immutable (uf));
452 for (i = 0; i < G_N_ELEMENTS (properties); i++) {
453 const property_t *p = &properties[i];
454 const test_pair_t *tests;
456 g_test_message ("Testing property %s", p->name);
458 for (j = 0; j < p->num_tests; j++) {
459 g_test_message ("Test %s #%d: U+%04X", p->name, j, tests[j].unicode);
460 g_assert_cmphex (p->getter (uf, tests[j].unicode), ==, default_value (p->default_value, tests[j].unicode));
464 hb_unicode_funcs_destroy (uf);
467 #define MAGIC0 0x12345678
468 #define MAGIC1 0x76543210
479 data_fixture_init (data_fixture_t *f, gconstpointer user_data)
481 f->data[0].value = MAGIC0;
482 f->data[1].value = MAGIC1;
485 data_fixture_finish (data_fixture_t *f, gconstpointer user_data)
489 static void free_up (void *p)
491 data_t *data = (data_t *) p;
493 g_assert (data->value == MAGIC0 || data->value == MAGIC1);
494 g_assert (data->freed == FALSE);
499 simple_get_script (hb_unicode_funcs_t *ufuncs,
500 hb_codepoint_t codepoint,
503 data_t *data = (data_t *) user_data;
505 g_assert (hb_unicode_funcs_get_parent (ufuncs) == NULL);
506 g_assert (data->value == MAGIC0);
507 g_assert (data->freed == FALSE);
509 if ('a' <= codepoint && codepoint <= 'z')
510 return HB_SCRIPT_LATIN;
512 return HB_SCRIPT_UNKNOWN;
516 a_is_for_arabic_get_script (hb_unicode_funcs_t *ufuncs,
517 hb_codepoint_t codepoint,
520 data_t *data = (data_t *) user_data;
522 g_assert (hb_unicode_funcs_get_parent (ufuncs) != NULL);
523 g_assert (data->value == MAGIC1);
524 g_assert (data->freed == FALSE);
526 if (codepoint == 'a') {
527 return HB_SCRIPT_ARABIC;
529 hb_unicode_funcs_t *parent = hb_unicode_funcs_get_parent (ufuncs);
531 return hb_unicode_get_script (parent, codepoint);
536 test_unicode_custom (data_fixture_t *f, gconstpointer user_data)
538 hb_unicode_funcs_t *uf = hb_unicode_funcs_create (NULL);
540 hb_unicode_funcs_set_script_func (uf, simple_get_script,
541 &f->data[0], free_up);
543 g_assert_cmpint (hb_unicode_get_script (uf, 'a'), ==, HB_SCRIPT_LATIN);
544 g_assert_cmpint (hb_unicode_get_script (uf, '0'), ==, HB_SCRIPT_UNKNOWN);
546 g_assert (!hb_unicode_funcs_is_immutable (uf));
547 hb_unicode_funcs_make_immutable (uf);
548 g_assert (hb_unicode_funcs_is_immutable (uf));
550 /* Since uf is immutable now, the following setter should do nothing. */
551 hb_unicode_funcs_set_script_func (uf, a_is_for_arabic_get_script,
552 &f->data[1], free_up);
554 g_assert (!f->data[0].freed && !f->data[1].freed);
555 hb_unicode_funcs_destroy (uf);
556 g_assert (f->data[0].freed && !f->data[1].freed);
560 test_unicode_subclassing_nil (data_fixture_t *f, gconstpointer user_data)
562 hb_unicode_funcs_t *uf, *aa;
564 uf = hb_unicode_funcs_create (NULL);
566 aa = hb_unicode_funcs_create (uf);
568 hb_unicode_funcs_destroy (uf);
570 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
571 &f->data[1], free_up);
573 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
574 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
576 g_assert (!f->data[0].freed && !f->data[1].freed);
577 hb_unicode_funcs_destroy (aa);
578 g_assert (!f->data[0].freed && f->data[1].freed);
582 test_unicode_subclassing_default (data_fixture_t *f, gconstpointer user_data)
584 hb_unicode_funcs_t *uf, *aa;
586 uf = hb_unicode_funcs_get_default ();
587 aa = hb_unicode_funcs_create (uf);
589 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
590 &f->data[1], free_up);
592 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
593 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
595 g_assert (!f->data[0].freed && !f->data[1].freed);
596 hb_unicode_funcs_destroy (aa);
597 g_assert (!f->data[0].freed && f->data[1].freed);
601 test_unicode_subclassing_deep (data_fixture_t *f, gconstpointer user_data)
603 hb_unicode_funcs_t *uf, *aa;
605 uf = hb_unicode_funcs_create (NULL);
607 hb_unicode_funcs_set_script_func (uf, simple_get_script,
608 &f->data[0], free_up);
610 aa = hb_unicode_funcs_create (uf);
612 hb_unicode_funcs_destroy (uf);
614 /* make sure the 'uf' didn't get freed, since 'aa' holds a ref */
615 g_assert (!f->data[0].freed);
617 hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
618 &f->data[1], free_up);
620 g_assert_cmpint (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
621 g_assert_cmpint (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
622 g_assert_cmpint (hb_unicode_get_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
624 g_assert (!f->data[0].freed && !f->data[1].freed);
625 hb_unicode_funcs_destroy (aa);
626 g_assert (f->data[0].freed && f->data[1].freed);
631 main (int argc, char **argv)
633 hb_test_init (&argc, &argv);
635 hb_test_add (test_unicode_properties_nil);
637 hb_test_add_data_flavor (hb_unicode_funcs_get_default (), "default", test_unicode_properties);
639 hb_test_add_data_flavor (hb_glib_get_unicode_funcs (), "glib", test_unicode_properties);
642 hb_test_add_data_flavor (hb_icu_get_unicode_funcs (), "icu", test_unicode_properties);
645 hb_test_add_fixture (data_fixture, NULL, test_unicode_custom);
646 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_nil);
647 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_default);
648 hb_test_add_fixture (data_fixture, NULL, test_unicode_subclassing_deep);
650 /* XXX test icu ufuncs */
651 /* XXX test _more tests (warn?) */
652 /* XXX test chainup */
653 /* XXX test glib & icu two-way script conversion */
655 return hb_test_run ();