2 * Copyright (C) 2009 Red Hat, Inc.
3 * Copyright (C) 2009 Keith Stribley
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
28 #include "hb-private.h"
32 #include "hb-unicode-private.hh"
34 #include <unicode/uversion.h>
35 #include <unicode/uchar.h>
36 #include <unicode/uscript.h>
42 hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
43 hb_codepoint_t unicode,
47 return u_getCombiningClass (unicode);
51 hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
52 hb_codepoint_t unicode,
55 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
69 static hb_unicode_general_category_t
70 hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
71 hb_codepoint_t unicode,
74 switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
76 case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
78 case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
79 case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
80 case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
81 case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
82 case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
84 case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
85 case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
86 case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
88 case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
89 case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
90 case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
92 case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
93 case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
94 case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
96 case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
97 case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
98 case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
99 case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
102 case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
103 case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
104 case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
105 case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
106 case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
108 case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
109 case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
110 case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
111 case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
113 case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
114 case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
117 return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
120 static hb_codepoint_t
121 hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
122 hb_codepoint_t unicode,
125 return u_charMirror(unicode);
129 hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
130 hb_codepoint_t unicode,
133 UErrorCode status = U_ZERO_ERROR;
134 UScriptCode scriptCode = uscript_getScript(unicode, &status);
135 switch ((int) scriptCode)
137 #define CHECK_ICU_VERSION(major, minor) \
138 U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor))
139 #define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
140 #define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2
142 MATCH_SCRIPT (COMMON);
143 MATCH_SCRIPT (INHERITED);
144 MATCH_SCRIPT (ARABIC);
145 MATCH_SCRIPT (ARMENIAN);
146 MATCH_SCRIPT (BENGALI);
147 MATCH_SCRIPT (BOPOMOFO);
148 MATCH_SCRIPT (CHEROKEE);
149 MATCH_SCRIPT (COPTIC);
150 MATCH_SCRIPT (CYRILLIC);
151 MATCH_SCRIPT (DESERET);
152 MATCH_SCRIPT (DEVANAGARI);
153 MATCH_SCRIPT (ETHIOPIC);
154 MATCH_SCRIPT (GEORGIAN);
155 MATCH_SCRIPT (GOTHIC);
156 MATCH_SCRIPT (GREEK);
157 MATCH_SCRIPT (GUJARATI);
158 MATCH_SCRIPT (GURMUKHI);
160 MATCH_SCRIPT (HANGUL);
161 MATCH_SCRIPT (HEBREW);
162 MATCH_SCRIPT (HIRAGANA);
163 MATCH_SCRIPT (KANNADA);
164 MATCH_SCRIPT (KATAKANA);
165 MATCH_SCRIPT (KHMER);
167 MATCH_SCRIPT (LATIN);
168 MATCH_SCRIPT (MALAYALAM);
169 MATCH_SCRIPT (MONGOLIAN);
170 MATCH_SCRIPT (MYANMAR);
171 MATCH_SCRIPT (OGHAM);
172 MATCH_SCRIPT (OLD_ITALIC);
173 MATCH_SCRIPT (ORIYA);
174 MATCH_SCRIPT (RUNIC);
175 MATCH_SCRIPT (SINHALA);
176 MATCH_SCRIPT (SYRIAC);
177 MATCH_SCRIPT (TAMIL);
178 MATCH_SCRIPT (TELUGU);
179 MATCH_SCRIPT (THAANA);
181 MATCH_SCRIPT (TIBETAN);
182 MATCH_SCRIPT (CANADIAN_ABORIGINAL);
184 MATCH_SCRIPT (TAGALOG);
185 MATCH_SCRIPT (HANUNOO);
186 MATCH_SCRIPT (BUHID);
187 MATCH_SCRIPT (TAGBANWA);
189 /* Unicode-4.0 additions */
190 MATCH_SCRIPT (BRAILLE);
191 MATCH_SCRIPT (CYPRIOT);
192 MATCH_SCRIPT (LIMBU);
193 MATCH_SCRIPT (OSMANYA);
194 MATCH_SCRIPT (SHAVIAN);
195 MATCH_SCRIPT (LINEAR_B);
196 MATCH_SCRIPT (TAI_LE);
197 MATCH_SCRIPT (UGARITIC);
199 /* Unicode-4.1 additions */
200 MATCH_SCRIPT (NEW_TAI_LUE);
201 MATCH_SCRIPT (BUGINESE);
202 MATCH_SCRIPT (GLAGOLITIC);
203 MATCH_SCRIPT (TIFINAGH);
204 MATCH_SCRIPT (SYLOTI_NAGRI);
205 MATCH_SCRIPT (OLD_PERSIAN);
206 MATCH_SCRIPT (KHAROSHTHI);
208 /* Unicode-5.0 additions */
209 MATCH_SCRIPT (UNKNOWN);
210 MATCH_SCRIPT (BALINESE);
211 MATCH_SCRIPT (CUNEIFORM);
212 MATCH_SCRIPT (PHOENICIAN);
213 MATCH_SCRIPT (PHAGS_PA);
216 /* Unicode-5.1 additions */
217 MATCH_SCRIPT (KAYAH_LI);
218 MATCH_SCRIPT (LEPCHA);
219 MATCH_SCRIPT (REJANG);
220 MATCH_SCRIPT (SUNDANESE);
221 MATCH_SCRIPT (SAURASHTRA);
223 MATCH_SCRIPT (OL_CHIKI);
225 MATCH_SCRIPT (CARIAN);
226 MATCH_SCRIPT (LYCIAN);
227 MATCH_SCRIPT (LYDIAN);
229 /* Unicode-5.2 additions */
230 MATCH_SCRIPT (AVESTAN);
231 #if CHECK_ICU_VERSION (4, 4)
232 MATCH_SCRIPT (BAMUM);
234 MATCH_SCRIPT (EGYPTIAN_HIEROGLYPHS);
235 MATCH_SCRIPT (IMPERIAL_ARAMAIC);
236 MATCH_SCRIPT (INSCRIPTIONAL_PAHLAVI);
237 MATCH_SCRIPT (INSCRIPTIONAL_PARTHIAN);
238 MATCH_SCRIPT (JAVANESE);
239 MATCH_SCRIPT (KAITHI);
240 MATCH_SCRIPT2(LANNA, TAI_THAM);
241 #if CHECK_ICU_VERSION (4, 4)
244 MATCH_SCRIPT2(MEITEI_MAYEK, MEETEI_MAYEK);
245 #if CHECK_ICU_VERSION (4, 4)
246 MATCH_SCRIPT (OLD_SOUTH_ARABIAN);
248 MATCH_SCRIPT2(ORKHON, OLD_TURKIC);
249 MATCH_SCRIPT (SAMARITAN);
250 MATCH_SCRIPT (TAI_VIET);
252 /* Unicode-6.0 additions */
253 MATCH_SCRIPT (BATAK);
254 MATCH_SCRIPT (BRAHMI);
255 MATCH_SCRIPT2(MANDAEAN, MANDAIC);
257 #undef CHECK_ICU_VERSION
262 return HB_SCRIPT_UNKNOWN;
265 static hb_unicode_funcs_t icu_ufuncs = {
266 HB_REFERENCE_COUNT_INVALID, /* ref_count */
268 TRUE, /* immutable */
270 hb_icu_get_combining_class,
271 hb_icu_get_eastasian_width,
272 hb_icu_get_general_category,
273 hb_icu_get_mirroring,
279 hb_icu_get_unicode_funcs (void)