From: David Corbett Date: Sat, 20 Jan 2018 20:53:09 +0000 (-0500) Subject: Match extlang subtags X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7c7cb2a98907d99ca86bdbfca0bf9c48bfa4ed49;p=platform%2Fupstream%2FlibHarfBuzzSharp.git Match extlang subtags If the second subtag of a BCP 47 tag is three letters long, it denotes an extended language. The tag converter ignores the language subtag and uses the extended language instead. There are some grandfathered exceptions, which are handled earlier. --- diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index 925ffb4..7cbf3a7 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -884,7 +884,7 @@ def print_subtag_matches (subtag): for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])): lt = LanguageTag (language) - if len (lt.subtags) == 1 or lt.grandfathered and ot.from_bcp_47[lt.subtags[0]] == tags: + if len (lt.subtags) == 1 or lt.grandfathered and len (lt.subtags[1]) != 3 and ot.from_bcp_47[lt.subtags[0]] == tags: continue print (' if (', end='') if (lt.language == 'und' or diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index 0d06255..7211368 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -1279,6 +1279,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "zh-min-nan")) + { + /* Minnan, Hokkien, Amoy, Taiwanese, Southern Min, Southern Fujian, Hoklo, Southern Fukien, Ho-lo */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } if (lang_matches (lang_str, "cdo-hans")) { /* Min Dong Chinese; Han (Simplified variant) */ @@ -1791,6 +1798,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "no-bok")) + { + /* Norwegian Bokmal */ + tags[0] = HB_TAG('N','O','R',' '); /* Norwegian */ + *count = 1; + return true; + } if (0 == strcmp (lang_str, "no-nyn")) { /* Norwegian Nynorsk */ @@ -1822,6 +1836,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "zh-min")) + { + /* Min, Fuzhou, Hokkien, Amoy, or Taiwanese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } if (0 == strcmp (lang_str, "i-hak")) { /* Hakka */ diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc index 4d8cb59..0d4c060 100644 --- a/src/hb-ot-tag.cc +++ b/src/hb-ot-tag.cc @@ -249,8 +249,17 @@ hb_ot_tags_from_language (const char *lang_str, return; /* Find a language matching in the first component. */ + s = strchr (lang_str, '-'); { const LangTag *lang_tag; + if (s && limit - lang_str >= 6) + { + const char *extlang_end = strchr (s + 1, '-'); + /* If there is an extended language tag, use it. */ + if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) && + ISALPHA (s[1])) + lang_str = s + 1; + } lang_tag = (LangTag *) bsearch (lang_str, ot_languages, ARRAY_LENGTH (ot_languages), sizeof (LangTag), lang_compare_first_component); @@ -264,7 +273,6 @@ hb_ot_tags_from_language (const char *lang_str, } } - s = strchr (lang_str, '-'); if (!s) s = lang_str + strlen (lang_str); if (s - lang_str == 3) { diff --git a/test/api/test-ot-tag.c b/test/api/test-ot-tag.c index 6d64d13..3502898 100644 --- a/test/api/test-ot-tag.c +++ b/test/api/test-ot-tag.c @@ -369,9 +369,13 @@ test_ot_tag_language (void) test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc"); /* Unnormalized BCP 47 tags */ + test_tag_from_language ("ARA", "ar-aao"); test_tag_from_language ("JBO", "art-lojban"); + test_tag_from_language ("KOK", "kok-gom"); test_tag_from_language ("LTZ", "i-lux"); test_tag_from_language ("MNG", "drh"); + test_tag_from_language ("MOR", "ar-ary"); + test_tag_from_language ("MOR", "ar-ary-DZ"); test_tag_from_language ("NOR", "no-bok"); test_tag_from_language ("NYN", "no-nyn"); test_tag_from_language ("ZHS", "i-hak"); @@ -379,6 +383,9 @@ test_ot_tag_language (void) test_tag_from_language ("ZHS", "zh-min"); test_tag_from_language ("ZHS", "zh-min-nan"); test_tag_from_language ("ZHS", "zh-xiang"); + + /* A UN M.49 region code, not an extended language subtag */ + test_tag_from_language ("ARA", "ar-001"); } static void