2 * Copyright © 2011 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
31 /* Unit tests for hb-ot-tag.h */
34 /* https://www.microsoft.com/typography/otspec/scripttags.htm */
37 test_simple_tags (const char *s, hb_script_t script)
42 g_test_message ("Testing script %c%c%c%c: tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s);
43 tag = hb_tag_from_string (s, -1);
45 hb_ot_tags_from_script (script, &t1, &t2);
47 g_assert_cmphex (t1, ==, tag);
48 g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
50 g_assert_cmphex (hb_ot_tag_to_script (tag), ==, script);
54 test_indic_tags (const char *s1, const char *s2, hb_script_t script)
56 hb_script_t tag1, tag2;
59 g_test_message ("Testing script %c%c%c%c: new tag %s, old tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s1, s2);
60 tag1 = hb_tag_from_string (s1, -1);
61 tag2 = hb_tag_from_string (s2, -1);
63 hb_ot_tags_from_script (script, &t1, &t2);
65 g_assert_cmphex (t1, ==, tag1);
66 g_assert_cmphex (t2, ==, tag2);
68 g_assert_cmphex (hb_ot_tag_to_script (tag1), ==, script);
69 g_assert_cmphex (hb_ot_tag_to_script (tag2), ==, script);
73 test_ot_tag_script_degenerate (void)
77 g_assert_cmphex (HB_TAG_CHAR4 ("DFLT"), ==, HB_OT_TAG_DEFAULT_SCRIPT);
79 /* HIRAGANA and KATAKANA both map to 'kana' */
80 test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
81 hb_ot_tags_from_script (HB_SCRIPT_HIRAGANA, &t1, &t2);
82 g_assert_cmphex (t1, ==, HB_TAG_CHAR4 ("kana"));
83 g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
85 test_simple_tags ("DFLT", HB_SCRIPT_INVALID);
87 /* Spaces are replaced */
88 g_assert_cmphex (hb_ot_tag_to_script (HB_TAG_CHAR4 ("be ")), ==, hb_script_from_string ("Beee", -1));
92 test_ot_tag_script_simple (void)
94 /* Arbitrary non-existent script */
95 test_simple_tags ("wwyz", hb_script_from_string ("wWyZ", -1));
97 /* These we don't really care about */
98 test_simple_tags ("zyyy", HB_SCRIPT_COMMON);
99 test_simple_tags ("zinh", HB_SCRIPT_INHERITED);
100 test_simple_tags ("zzzz", HB_SCRIPT_UNKNOWN);
102 test_simple_tags ("arab", HB_SCRIPT_ARABIC);
103 test_simple_tags ("copt", HB_SCRIPT_COPTIC);
104 test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
105 test_simple_tags ("latn", HB_SCRIPT_LATIN);
107 /* These are trickier since their OT script tags have space. */
108 test_simple_tags ("lao ", HB_SCRIPT_LAO);
109 test_simple_tags ("yi ", HB_SCRIPT_YI);
110 /* Unicode-5.0 additions */
111 test_simple_tags ("nko ", HB_SCRIPT_NKO);
112 /* Unicode-5.1 additions */
113 test_simple_tags ("vai ", HB_SCRIPT_VAI);
115 /* https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm */
117 /* Unicode-5.2 additions */
118 test_simple_tags ("mtei", HB_SCRIPT_MEETEI_MAYEK);
119 /* Unicode-6.0 additions */
120 test_simple_tags ("mand", HB_SCRIPT_MANDAIC);
124 test_ot_tag_script_indic (void)
126 test_indic_tags ("bng2", "beng", HB_SCRIPT_BENGALI);
127 test_indic_tags ("dev2", "deva", HB_SCRIPT_DEVANAGARI);
128 test_indic_tags ("gjr2", "gujr", HB_SCRIPT_GUJARATI);
129 test_indic_tags ("gur2", "guru", HB_SCRIPT_GURMUKHI);
130 test_indic_tags ("knd2", "knda", HB_SCRIPT_KANNADA);
131 test_indic_tags ("mlm2", "mlym", HB_SCRIPT_MALAYALAM);
132 test_indic_tags ("ory2", "orya", HB_SCRIPT_ORIYA);
133 test_indic_tags ("tml2", "taml", HB_SCRIPT_TAMIL);
134 test_indic_tags ("tel2", "telu", HB_SCRIPT_TELUGU);
135 test_indic_tags ("mym2", "mymr", HB_SCRIPT_MYANMAR);
140 /* https://www.microsoft.com/typography/otspec/languagetags.htm */
143 test_language_two_way (const char *tag_s, const char *lang_s)
145 hb_language_t lang = hb_language_from_string (lang_s, -1);
146 hb_tag_t tag = hb_tag_from_string (tag_s, -1);
148 g_test_message ("Testing language %s <-> tag %s", lang_s, tag_s);
150 g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
151 g_assert (lang == hb_ot_tag_to_language (tag));
155 test_tag_from_language (const char *tag_s, const char *lang_s)
157 hb_language_t lang = hb_language_from_string (lang_s, -1);
158 hb_tag_t tag = hb_tag_from_string (tag_s, -1);
160 g_test_message ("Testing language %s -> tag %s", lang_s, tag_s);
162 g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
166 test_tag_to_language (const char *tag_s, const char *lang_s)
168 hb_language_t lang = hb_language_from_string (lang_s, -1);
169 hb_tag_t tag = hb_tag_from_string (tag_s, -1);
171 g_test_message ("Testing tag %s -> language %s", tag_s, lang_s);
173 g_assert (lang == hb_ot_tag_to_language (tag));
177 test_ot_tag_language (void)
179 g_assert_cmphex (HB_TAG_CHAR4 ("dflt"), ==, HB_OT_TAG_DEFAULT_LANGUAGE);
180 test_language_two_way ("dflt", NULL);
182 test_language_two_way ("ARA", "ar");
184 test_language_two_way ("AZE", "az");
185 test_tag_from_language ("AZE", "az-ir");
186 test_tag_from_language ("AZE", "az-az");
188 test_language_two_way ("ENG", "en");
189 test_tag_from_language ("ENG", "en_US");
191 test_language_two_way ("CJA", "cja"); /* Western Cham */
192 test_language_two_way ("CJM", "cjm"); /* Eastern Cham */
193 test_language_two_way ("EVN", "eve");
195 test_language_two_way ("HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
196 test_tag_from_language ("HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
198 test_tag_from_language ("QIN", "bgr"); /* Bawm Chin */
199 test_tag_from_language ("QIN", "cbl"); /* Bualkhaw Chin */
200 test_tag_from_language ("QIN", "cka"); /* Khumi Awa Chin */
201 test_tag_from_language ("QIN", "cmr"); /* Mro-Khimi Chin */
202 test_tag_from_language ("QIN", "cnb"); /* Chinbon Chin */
203 test_tag_from_language ("QIN", "cnh"); /* Hakha Chin */
204 test_tag_from_language ("QIN", "cnk"); /* Khumi Chin */
205 test_tag_from_language ("QIN", "cnw"); /* Ngawn Chin */
206 test_tag_from_language ("QIN", "csh"); /* Asho Chin */
207 test_tag_from_language ("QIN", "csy"); /* Siyin Chin */
208 test_tag_from_language ("QIN", "ctd"); /* Tedim Chin */
209 test_tag_from_language ("QIN", "czt"); /* Zotung Chin */
210 test_tag_from_language ("QIN", "dao"); /* Daai Chin */
211 test_tag_from_language ("QIN", "hlt"); /* Matu Chin */
212 test_tag_from_language ("QIN", "mrh"); /* Mara Chin */
213 test_tag_from_language ("QIN", "pck"); /* Paite Chin */
214 test_tag_from_language ("QIN", "sez"); /* Senthang Chin */
215 test_tag_from_language ("QIN", "tcp"); /* Tawr Chin */
216 test_tag_from_language ("QIN", "tcz"); /* Thado Chin */
217 test_tag_from_language ("QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
218 test_tag_from_language ("QIN", "zom"); /* Zou */
219 test_tag_to_language ("QIN", "bgr"); /* no single BCP47 tag for Chin; picking Bawm Chin */
221 test_language_two_way ("FAR", "fa");
222 test_tag_from_language ("FAR", "fa_IR");
224 test_language_two_way ("SWA", "aii"); /* Swadaya Aramaic */
226 test_language_two_way ("SYR", "syr"); /* Syriac [macrolanguage] */
227 test_tag_from_language ("SYR", "amw"); /* Western Neo-Aramaic */
228 test_tag_from_language ("SYR", "cld"); /* Chaldean Neo-Aramaic */
229 test_tag_from_language ("SYR", "syc"); /* Classical Syriac */
231 test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */
233 test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
235 test_tag_from_language ("ZHS", "zh"); /* Chinese */
236 test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */
237 test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */
238 test_tag_from_language ("ZHH", "zh-mo"); /* Chinese (Macao) */
239 test_tag_from_language ("ZHH", "zh-hant-mo"); /* Chinese (Macao) */
240 test_tag_from_language ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
241 test_tag_from_language ("ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */
242 test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */
243 test_tag_from_language ("ZHS", "zh-Hans"); /* Chinese (Simplified) */
244 test_tag_from_language ("ZHT", "zh-Hant"); /* Chinese (Traditional) */
245 test_tag_from_language ("ZHS", "zh-xx"); /* Chinese (Other) */
247 test_tag_from_language ("ZHS", "zh"); /* Chinese */
248 test_tag_from_language ("ZHS", "zh-xx");
250 test_tag_to_language ("ZHS", "zh-Hans");
251 test_tag_to_language ("ZHT", "zh-Hant");
252 test_tag_to_language ("ZHP", "x-hbotzhp");
254 test_language_two_way ("ABC", "x-hbotabc");
255 test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc");
256 test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc");
257 test_tag_from_language ("ABCD", "asdf-asdf-wer-x-hbotabcd");
259 test_tag_from_language ("dflt", "asdf-asdf-wer-x-hbot-zxc");
261 test_tag_from_language ("dflt", "xy");
262 test_tag_from_language ("XYZ", "xyz"); /* Unknown ISO 639-3 */
263 test_tag_from_language ("XYZ", "xyz-qw"); /* Unknown ISO 639-3 */
265 /* International Phonetic Alphabet */
266 test_tag_from_language ("IPPH", "en-fonipa");
267 test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar");
268 test_tag_from_language ("IPPH", "und-fonipa");
269 test_tag_from_language ("IPPH", "zh-fonipa");
270 test_tag_to_language ("IPPH", "und-fonipa");
272 /* North American Phonetic Alphabet (Americanist Phonetic Notation) */
273 test_tag_from_language ("APPH", "en-fonnapa");
274 test_tag_from_language ("APPH", "chr-fonnapa");
275 test_tag_from_language ("APPH", "und-fonnapa");
276 test_tag_to_language ("APPH", "und-fonnapa");
278 /* Estrangela Syriac */
279 test_tag_from_language ("SYRE", "aii-Syre");
280 test_tag_from_language ("SYRE", "de-Syre");
281 test_tag_from_language ("SYRE", "syr-Syre");
282 test_tag_from_language ("SYRE", "und-Syre");
283 test_tag_to_language ("SYRE", "und-Syre");
286 test_tag_from_language ("SYRJ", "aii-Syrj");
287 test_tag_from_language ("SYRJ", "de-Syrj");
288 test_tag_from_language ("SYRJ", "syr-Syrj");
289 test_tag_from_language ("SYRJ", "und-Syrj");
290 test_tag_to_language ("SYRJ", "und-Syrj");
293 test_tag_from_language ("SYRN", "aii-Syrn");
294 test_tag_from_language ("SYRN", "de-Syrn");
295 test_tag_from_language ("SYRN", "syr-Syrn");
296 test_tag_from_language ("SYRN", "und-Syrn");
297 test_tag_to_language ("SYRN", "und-Syrn");
299 /* Test that x-hbot overrides the base language */
300 test_tag_from_language ("ABC", "fa-x-hbotabc-zxc");
301 test_tag_from_language ("ABC", "fa-ir-x-hbotabc-zxc");
302 test_tag_from_language ("ABC", "zh-x-hbotabc-zxc");
303 test_tag_from_language ("ABC", "zh-cn-x-hbotabc-zxc");
304 test_tag_from_language ("ABC", "zh-xy-x-hbotabc-zxc");
305 test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
309 main (int argc, char **argv)
311 hb_test_init (&argc, &argv);
313 hb_test_add (test_ot_tag_script_degenerate);
314 hb_test_add (test_ot_tag_script_simple);
315 hb_test_add (test_ot_tag_script_indic);
317 hb_test_add (test_ot_tag_language);
319 return hb_test_run();