Move buffer into apply_context
[framework/uifw/harfbuzz.git] / src / hb-icu.c
1 /*
2  * Copyright (C) 2009  Red Hat, Inc.
3  * Copyright (C) 2009  Keith Stribley <devel@thanlwinsoft.org>
4  *
5  *  This is part of HarfBuzz, a text shaping library.
6  *
7  * Permission is hereby granted, without written agreement and without
8  * license or royalty fees, to use, copy, modify, and distribute this
9  * software and its documentation for any purpose, provided that the
10  * above copyright notice and the following two paragraphs appear in
11  * all copies of this software.
12  *
13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17  * DAMAGE.
18  *
19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24  *
25  * Red Hat Author(s): Behdad Esfahbod
26  */
27
28 #include "hb-private.h"
29
30 #include "hb-icu.h"
31
32 #include "hb-unicode-private.h"
33
34 #include <unicode/uchar.h>
35 #include <unicode/uscript.h>
36
37 static hb_codepoint_t hb_icu_get_mirroring (hb_codepoint_t unicode) { return u_charMirror(unicode); }
38 static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return u_getCombiningClass (unicode); }
39
40 static unsigned int
41 hb_icu_get_eastasian_width (hb_codepoint_t unicode)
42 {
43   switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
44   {
45   case U_EA_WIDE:
46   case U_EA_FULLWIDTH:
47     return 2;
48   case U_EA_NEUTRAL:
49   case U_EA_AMBIGUOUS:
50   case U_EA_HALFWIDTH:
51   case U_EA_NARROW:
52     return 1;
53   }
54   return 1;
55 }
56
57 static hb_category_t
58 hb_icu_get_general_category (hb_codepoint_t unicode)
59 {
60   switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
61   {
62   case U_UNASSIGNED:                    return HB_CATEGORY_UNASSIGNED;
63
64   case U_UPPERCASE_LETTER:              return HB_CATEGORY_UPPERCASE_LETTER;    /* Lu */
65   case U_LOWERCASE_LETTER:              return HB_CATEGORY_LOWERCASE_LETTER;    /* Ll */
66   case U_TITLECASE_LETTER:              return HB_CATEGORY_TITLECASE_LETTER;    /* Lt */
67   case U_MODIFIER_LETTER:               return HB_CATEGORY_MODIFIER_LETTER;     /* Lm */
68   case U_OTHER_LETTER:                  return HB_CATEGORY_OTHER_LETTER;        /* Lo */
69
70   case U_NON_SPACING_MARK:              return HB_CATEGORY_NON_SPACING_MARK;    /* Mn */
71   case U_ENCLOSING_MARK:                return HB_CATEGORY_ENCLOSING_MARK;      /* Me */
72   case U_COMBINING_SPACING_MARK:        return HB_CATEGORY_COMBINING_MARK;      /* Mc */
73
74   case U_DECIMAL_DIGIT_NUMBER:          return HB_CATEGORY_DECIMAL_NUMBER;      /* Nd */
75   case U_LETTER_NUMBER:                 return HB_CATEGORY_LETTER_NUMBER;       /* Nl */
76   case U_OTHER_NUMBER:                  return HB_CATEGORY_OTHER_NUMBER;        /* No */
77
78   case U_SPACE_SEPARATOR:               return HB_CATEGORY_SPACE_SEPARATOR;     /* Zs */
79   case U_LINE_SEPARATOR:                return HB_CATEGORY_LINE_SEPARATOR;      /* Zl */
80   case U_PARAGRAPH_SEPARATOR:           return HB_CATEGORY_PARAGRAPH_SEPARATOR; /* Zp */
81
82   case U_CONTROL_CHAR:                  return HB_CATEGORY_CONTROL;             /* Cc */
83   case U_FORMAT_CHAR:                   return HB_CATEGORY_FORMAT;              /* Cf */
84   case U_PRIVATE_USE_CHAR:              return HB_CATEGORY_PRIVATE_USE;         /* Co */
85   case U_SURROGATE:                     return HB_CATEGORY_SURROGATE;           /* Cs */
86
87
88   case U_DASH_PUNCTUATION:              return HB_CATEGORY_DASH_PUNCTUATION;    /* Pd */
89   case U_START_PUNCTUATION:             return HB_CATEGORY_OPEN_PUNCTUATION;    /* Ps */
90   case U_END_PUNCTUATION:               return HB_CATEGORY_CLOSE_PUNCTUATION;   /* Pe */
91   case U_CONNECTOR_PUNCTUATION:         return HB_CATEGORY_CONNECT_PUNCTUATION; /* Pc */
92   case U_OTHER_PUNCTUATION:             return HB_CATEGORY_OTHER_PUNCTUATION;   /* Po */
93
94   case U_MATH_SYMBOL:                   return HB_CATEGORY_MATH_SYMBOL;         /* Sm */
95   case U_CURRENCY_SYMBOL:               return HB_CATEGORY_CURRENCY_SYMBOL;     /* Sc */
96   case U_MODIFIER_SYMBOL:               return HB_CATEGORY_MODIFIER_SYMBOL;     /* Sk */
97   case U_OTHER_SYMBOL:                  return HB_CATEGORY_OTHER_SYMBOL;        /* So */
98
99   case U_INITIAL_PUNCTUATION:           return HB_CATEGORY_INITIAL_PUNCTUATION; /* Pi */
100   case U_FINAL_PUNCTUATION:             return HB_CATEGORY_FINAL_PUNCTUATION;   /* Pf */
101   }
102
103   return HB_CATEGORY_UNASSIGNED;
104 }
105
106 static hb_script_t
107 hb_icu_get_script (hb_codepoint_t unicode)
108 {
109   UErrorCode status = U_ZERO_ERROR;
110   UScriptCode scriptCode = uscript_getScript(unicode, &status);
111   switch ((int) scriptCode)
112   {
113 #define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
114   MATCH_SCRIPT (INVALID_CODE);
115   MATCH_SCRIPT (COMMON);   /* Zyyy */
116   MATCH_SCRIPT (INHERITED);          /* Qaai */
117   MATCH_SCRIPT (ARABIC);             /* Arab */
118   MATCH_SCRIPT (ARMENIAN);           /* Armn */
119   MATCH_SCRIPT (BENGALI);            /* Beng */
120   MATCH_SCRIPT (BOPOMOFO);           /* Bopo */
121   MATCH_SCRIPT (CHEROKEE);           /* Cher */
122   MATCH_SCRIPT (COPTIC);             /* Qaac */
123   MATCH_SCRIPT (CYRILLIC);           /* Cyrl (Cyrs) */
124   MATCH_SCRIPT (DESERET);            /* Dsrt */
125   MATCH_SCRIPT (DEVANAGARI);         /* Deva */
126   MATCH_SCRIPT (ETHIOPIC);           /* Ethi */
127   MATCH_SCRIPT (GEORGIAN);           /* Geor (Geon); Geoa) */
128   MATCH_SCRIPT (GOTHIC);             /* Goth */
129   MATCH_SCRIPT (GREEK);              /* Grek */
130   MATCH_SCRIPT (GUJARATI);           /* Gujr */
131   MATCH_SCRIPT (GURMUKHI);           /* Guru */
132   MATCH_SCRIPT (HAN);                /* Hani */
133   MATCH_SCRIPT (HANGUL);             /* Hang */
134   MATCH_SCRIPT (HEBREW);             /* Hebr */
135   MATCH_SCRIPT (HIRAGANA);           /* Hira */
136   MATCH_SCRIPT (KANNADA);            /* Knda */
137   MATCH_SCRIPT (KATAKANA);           /* Kana */
138   MATCH_SCRIPT (KHMER);              /* Khmr */
139   MATCH_SCRIPT (LAO);                /* Laoo */
140   MATCH_SCRIPT (LATIN);              /* Latn (Latf); Latg) */
141   MATCH_SCRIPT (MALAYALAM);          /* Mlym */
142   MATCH_SCRIPT (MONGOLIAN);          /* Mong */
143   MATCH_SCRIPT (MYANMAR);            /* Mymr */
144   MATCH_SCRIPT (OGHAM);              /* Ogam */
145   MATCH_SCRIPT (OLD_ITALIC);         /* Ital */
146   MATCH_SCRIPT (ORIYA);              /* Orya */
147   MATCH_SCRIPT (RUNIC);              /* Runr */
148   MATCH_SCRIPT (SINHALA);            /* Sinh */
149   MATCH_SCRIPT (SYRIAC);             /* Syrc (Syrj, Syrn); Syre) */
150   MATCH_SCRIPT (TAMIL);              /* Taml */
151   MATCH_SCRIPT (TELUGU);             /* Telu */
152   MATCH_SCRIPT (THAANA);             /* Thaa */
153   MATCH_SCRIPT (THAI);               /* Thai */
154   MATCH_SCRIPT (TIBETAN);            /* Tibt */
155   MATCH_SCRIPT (CANADIAN_ABORIGINAL);/* Cans */
156   MATCH_SCRIPT (YI);                 /* Yiii */
157   MATCH_SCRIPT (TAGALOG);            /* Tglg */
158   MATCH_SCRIPT (HANUNOO);            /* Hano */
159   MATCH_SCRIPT (BUHID);              /* Buhd */
160   MATCH_SCRIPT (TAGBANWA);           /* Tagb */
161
162   /* Unicode-4.0 additions */
163   MATCH_SCRIPT (BRAILLE);            /* Brai */
164   MATCH_SCRIPT (CYPRIOT);            /* Cprt */
165   MATCH_SCRIPT (LIMBU);              /* Limb */
166   MATCH_SCRIPT (OSMANYA);            /* Osma */
167   MATCH_SCRIPT (SHAVIAN);            /* Shaw */
168   MATCH_SCRIPT (LINEAR_B);           /* Linb */
169   MATCH_SCRIPT (TAI_LE);             /* Tale */
170   MATCH_SCRIPT (UGARITIC);           /* Ugar */
171
172   /* Unicode-4.1 additions */
173   MATCH_SCRIPT (NEW_TAI_LUE);        /* Talu */
174   MATCH_SCRIPT (BUGINESE);           /* Bugi */
175   MATCH_SCRIPT (GLAGOLITIC);         /* Glag */
176   MATCH_SCRIPT (TIFINAGH);           /* Tfng */
177   MATCH_SCRIPT (SYLOTI_NAGRI);       /* Sylo */
178   MATCH_SCRIPT (OLD_PERSIAN);        /* Xpeo */
179   MATCH_SCRIPT (KHAROSHTHI);         /* Khar */
180
181   /* Unicode-5.0 additions */
182   MATCH_SCRIPT (UNKNOWN);            /* Zzzz */
183   MATCH_SCRIPT (BALINESE);           /* Bali */
184   MATCH_SCRIPT (CUNEIFORM);          /* Xsux */
185   MATCH_SCRIPT (PHOENICIAN);         /* Phnx */
186   MATCH_SCRIPT (PHAGS_PA);           /* Phag */
187   MATCH_SCRIPT (NKO);                /* Nkoo */
188
189   /* Unicode-5.1 additions */
190   MATCH_SCRIPT (KAYAH_LI);           /* Kali */
191   MATCH_SCRIPT (LEPCHA);             /* Lepc */
192   MATCH_SCRIPT (REJANG);             /* Rjng */
193   MATCH_SCRIPT (SUNDANESE);          /* Sund */
194   MATCH_SCRIPT (SAURASHTRA);         /* Saur */
195   MATCH_SCRIPT (CHAM);               /* Cham */
196   MATCH_SCRIPT (OL_CHIKI);           /* Olck */
197   MATCH_SCRIPT (VAI);                /* Vaii */
198   MATCH_SCRIPT (CARIAN);             /* Cari */
199   MATCH_SCRIPT (LYCIAN);             /* Lyci */
200   MATCH_SCRIPT (LYDIAN);             /* Lydi */
201   }
202   return HB_SCRIPT_UNKNOWN;
203 }
204
205 static hb_unicode_funcs_t icu_ufuncs = {
206   HB_REFERENCE_COUNT_INVALID, /* ref_count */
207
208   TRUE, /* immutable */
209
210   hb_icu_get_general_category,
211   hb_icu_get_combining_class,
212   hb_icu_get_mirroring,
213   hb_icu_get_script,
214   hb_icu_get_eastasian_width
215 };
216
217 hb_unicode_funcs_t *
218 hb_icu_get_unicode_funcs (void)
219 {
220   return &icu_ufuncs;
221 }