2 * Copyright (C) 2009 Red Hat, Inc.
3 * Copyright (C) 2009 Keith Stribley <devel@thanlwinsoft.org>
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
28 #include "hb-private.h"
32 #include "hb-unicode-private.h"
34 #include <unicode/uchar.h>
35 #include <unicode/uscript.h>
37 static hb_codepoint_t hb_icu_get_mirroring (hb_codepoint_t unicode) { return u_charMirror(unicode); }
38 static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return u_getCombiningClass (unicode); }
41 hb_icu_get_eastasian_width (hb_codepoint_t unicode)
43 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
58 hb_icu_get_general_category (hb_codepoint_t unicode)
60 switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
62 case U_UNASSIGNED: return HB_CATEGORY_UNASSIGNED;
64 case U_UPPERCASE_LETTER: return HB_CATEGORY_UPPERCASE_LETTER; /* Lu */
65 case U_LOWERCASE_LETTER: return HB_CATEGORY_LOWERCASE_LETTER; /* Ll */
66 case U_TITLECASE_LETTER: return HB_CATEGORY_TITLECASE_LETTER; /* Lt */
67 case U_MODIFIER_LETTER: return HB_CATEGORY_MODIFIER_LETTER; /* Lm */
68 case U_OTHER_LETTER: return HB_CATEGORY_OTHER_LETTER; /* Lo */
70 case U_NON_SPACING_MARK: return HB_CATEGORY_NON_SPACING_MARK; /* Mn */
71 case U_ENCLOSING_MARK: return HB_CATEGORY_ENCLOSING_MARK; /* Me */
72 case U_COMBINING_SPACING_MARK: return HB_CATEGORY_COMBINING_MARK; /* Mc */
74 case U_DECIMAL_DIGIT_NUMBER: return HB_CATEGORY_DECIMAL_NUMBER; /* Nd */
75 case U_LETTER_NUMBER: return HB_CATEGORY_LETTER_NUMBER; /* Nl */
76 case U_OTHER_NUMBER: return HB_CATEGORY_OTHER_NUMBER; /* No */
78 case U_SPACE_SEPARATOR: return HB_CATEGORY_SPACE_SEPARATOR; /* Zs */
79 case U_LINE_SEPARATOR: return HB_CATEGORY_LINE_SEPARATOR; /* Zl */
80 case U_PARAGRAPH_SEPARATOR: return HB_CATEGORY_PARAGRAPH_SEPARATOR; /* Zp */
82 case U_CONTROL_CHAR: return HB_CATEGORY_CONTROL; /* Cc */
83 case U_FORMAT_CHAR: return HB_CATEGORY_FORMAT; /* Cf */
84 case U_PRIVATE_USE_CHAR: return HB_CATEGORY_PRIVATE_USE; /* Co */
85 case U_SURROGATE: return HB_CATEGORY_SURROGATE; /* Cs */
88 case U_DASH_PUNCTUATION: return HB_CATEGORY_DASH_PUNCTUATION; /* Pd */
89 case U_START_PUNCTUATION: return HB_CATEGORY_OPEN_PUNCTUATION; /* Ps */
90 case U_END_PUNCTUATION: return HB_CATEGORY_CLOSE_PUNCTUATION; /* Pe */
91 case U_CONNECTOR_PUNCTUATION: return HB_CATEGORY_CONNECT_PUNCTUATION; /* Pc */
92 case U_OTHER_PUNCTUATION: return HB_CATEGORY_OTHER_PUNCTUATION; /* Po */
94 case U_MATH_SYMBOL: return HB_CATEGORY_MATH_SYMBOL; /* Sm */
95 case U_CURRENCY_SYMBOL: return HB_CATEGORY_CURRENCY_SYMBOL; /* Sc */
96 case U_MODIFIER_SYMBOL: return HB_CATEGORY_MODIFIER_SYMBOL; /* Sk */
97 case U_OTHER_SYMBOL: return HB_CATEGORY_OTHER_SYMBOL; /* So */
99 case U_INITIAL_PUNCTUATION: return HB_CATEGORY_INITIAL_PUNCTUATION; /* Pi */
100 case U_FINAL_PUNCTUATION: return HB_CATEGORY_FINAL_PUNCTUATION; /* Pf */
103 return HB_CATEGORY_UNASSIGNED;
107 hb_icu_get_script (hb_codepoint_t unicode)
109 UErrorCode status = U_ZERO_ERROR;
110 UScriptCode scriptCode = uscript_getScript(unicode, &status);
111 switch ((int) scriptCode)
113 #define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
114 MATCH_SCRIPT (INVALID_CODE);
115 MATCH_SCRIPT (COMMON); /* Zyyy */
116 MATCH_SCRIPT (INHERITED); /* Qaai */
117 MATCH_SCRIPT (ARABIC); /* Arab */
118 MATCH_SCRIPT (ARMENIAN); /* Armn */
119 MATCH_SCRIPT (BENGALI); /* Beng */
120 MATCH_SCRIPT (BOPOMOFO); /* Bopo */
121 MATCH_SCRIPT (CHEROKEE); /* Cher */
122 MATCH_SCRIPT (COPTIC); /* Qaac */
123 MATCH_SCRIPT (CYRILLIC); /* Cyrl (Cyrs) */
124 MATCH_SCRIPT (DESERET); /* Dsrt */
125 MATCH_SCRIPT (DEVANAGARI); /* Deva */
126 MATCH_SCRIPT (ETHIOPIC); /* Ethi */
127 MATCH_SCRIPT (GEORGIAN); /* Geor (Geon); Geoa) */
128 MATCH_SCRIPT (GOTHIC); /* Goth */
129 MATCH_SCRIPT (GREEK); /* Grek */
130 MATCH_SCRIPT (GUJARATI); /* Gujr */
131 MATCH_SCRIPT (GURMUKHI); /* Guru */
132 MATCH_SCRIPT (HAN); /* Hani */
133 MATCH_SCRIPT (HANGUL); /* Hang */
134 MATCH_SCRIPT (HEBREW); /* Hebr */
135 MATCH_SCRIPT (HIRAGANA); /* Hira */
136 MATCH_SCRIPT (KANNADA); /* Knda */
137 MATCH_SCRIPT (KATAKANA); /* Kana */
138 MATCH_SCRIPT (KHMER); /* Khmr */
139 MATCH_SCRIPT (LAO); /* Laoo */
140 MATCH_SCRIPT (LATIN); /* Latn (Latf); Latg) */
141 MATCH_SCRIPT (MALAYALAM); /* Mlym */
142 MATCH_SCRIPT (MONGOLIAN); /* Mong */
143 MATCH_SCRIPT (MYANMAR); /* Mymr */
144 MATCH_SCRIPT (OGHAM); /* Ogam */
145 MATCH_SCRIPT (OLD_ITALIC); /* Ital */
146 MATCH_SCRIPT (ORIYA); /* Orya */
147 MATCH_SCRIPT (RUNIC); /* Runr */
148 MATCH_SCRIPT (SINHALA); /* Sinh */
149 MATCH_SCRIPT (SYRIAC); /* Syrc (Syrj, Syrn); Syre) */
150 MATCH_SCRIPT (TAMIL); /* Taml */
151 MATCH_SCRIPT (TELUGU); /* Telu */
152 MATCH_SCRIPT (THAANA); /* Thaa */
153 MATCH_SCRIPT (THAI); /* Thai */
154 MATCH_SCRIPT (TIBETAN); /* Tibt */
155 MATCH_SCRIPT (CANADIAN_ABORIGINAL);/* Cans */
156 MATCH_SCRIPT (YI); /* Yiii */
157 MATCH_SCRIPT (TAGALOG); /* Tglg */
158 MATCH_SCRIPT (HANUNOO); /* Hano */
159 MATCH_SCRIPT (BUHID); /* Buhd */
160 MATCH_SCRIPT (TAGBANWA); /* Tagb */
162 /* Unicode-4.0 additions */
163 MATCH_SCRIPT (BRAILLE); /* Brai */
164 MATCH_SCRIPT (CYPRIOT); /* Cprt */
165 MATCH_SCRIPT (LIMBU); /* Limb */
166 MATCH_SCRIPT (OSMANYA); /* Osma */
167 MATCH_SCRIPT (SHAVIAN); /* Shaw */
168 MATCH_SCRIPT (LINEAR_B); /* Linb */
169 MATCH_SCRIPT (TAI_LE); /* Tale */
170 MATCH_SCRIPT (UGARITIC); /* Ugar */
172 /* Unicode-4.1 additions */
173 MATCH_SCRIPT (NEW_TAI_LUE); /* Talu */
174 MATCH_SCRIPT (BUGINESE); /* Bugi */
175 MATCH_SCRIPT (GLAGOLITIC); /* Glag */
176 MATCH_SCRIPT (TIFINAGH); /* Tfng */
177 MATCH_SCRIPT (SYLOTI_NAGRI); /* Sylo */
178 MATCH_SCRIPT (OLD_PERSIAN); /* Xpeo */
179 MATCH_SCRIPT (KHAROSHTHI); /* Khar */
181 /* Unicode-5.0 additions */
182 MATCH_SCRIPT (UNKNOWN); /* Zzzz */
183 MATCH_SCRIPT (BALINESE); /* Bali */
184 MATCH_SCRIPT (CUNEIFORM); /* Xsux */
185 MATCH_SCRIPT (PHOENICIAN); /* Phnx */
186 MATCH_SCRIPT (PHAGS_PA); /* Phag */
187 MATCH_SCRIPT (NKO); /* Nkoo */
189 /* Unicode-5.1 additions */
190 MATCH_SCRIPT (KAYAH_LI); /* Kali */
191 MATCH_SCRIPT (LEPCHA); /* Lepc */
192 MATCH_SCRIPT (REJANG); /* Rjng */
193 MATCH_SCRIPT (SUNDANESE); /* Sund */
194 MATCH_SCRIPT (SAURASHTRA); /* Saur */
195 MATCH_SCRIPT (CHAM); /* Cham */
196 MATCH_SCRIPT (OL_CHIKI); /* Olck */
197 MATCH_SCRIPT (VAI); /* Vaii */
198 MATCH_SCRIPT (CARIAN); /* Cari */
199 MATCH_SCRIPT (LYCIAN); /* Lyci */
200 MATCH_SCRIPT (LYDIAN); /* Lydi */
202 return HB_SCRIPT_UNKNOWN;
205 static hb_unicode_funcs_t icu_ufuncs = {
206 HB_REFERENCE_COUNT_INVALID, /* ref_count */
208 TRUE, /* immutable */
210 hb_icu_get_general_category,
211 hb_icu_get_combining_class,
212 hb_icu_get_mirroring,
214 hb_icu_get_eastasian_width
218 hb_icu_get_unicode_funcs (void)