2 * Copyright © 2012 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH
28 #define HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH
30 #include "hb-private.hh"
33 #include "hb-ot-shape-complex-private.hh"
34 #include "hb-ot-shape-private.hh" /* XXX Remove */
37 /* buffer var allocations */
38 #define indic_category() complex_var_u8_0() /* indic_category_t */
39 #define indic_position() complex_var_u8_1() /* indic_matra_category_t */
42 #define INDIC_TABLE_ELEMENT_TYPE uint8_t
44 /* Cateories used in the OpenType spec:
45 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
47 /* Note: This enum is duplicated in the -machine.rl source file.
48 * Not sure how to avoid duplication. */
49 enum indic_category_t {
62 OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
63 OT_RS, /* Register Shifter, used in Khmer OT spec */
66 OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
69 /* Visual positions in a syllable from left to right. */
70 enum indic_position_t {
73 POS_RA_TO_BECOME_REPH,
96 /* Categories used in IndicSyllabicCategory.txt from UCD. */
97 enum indic_syllabic_category_t {
98 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X,
100 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_X,
101 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM,
102 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C,
103 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C,
104 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL = OT_C,
105 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C,
106 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
107 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
108 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
109 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
110 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
111 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
112 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
113 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X,
114 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_X,
115 INDIC_SYLLABIC_CATEGORY_VIRAMA = OT_H,
116 INDIC_SYLLABIC_CATEGORY_VISARGA = OT_SM,
117 INDIC_SYLLABIC_CATEGORY_VOWEL = OT_V,
118 INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT = OT_M,
119 INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT = OT_V
122 /* Categories used in IndicSMatraCategory.txt from UCD */
123 enum indic_matra_category_t {
124 INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END,
126 INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C,
127 INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C,
128 INDIC_MATRA_CATEGORY_BOTTOM = POS_BELOW_C,
129 INDIC_MATRA_CATEGORY_RIGHT = POS_POST_C,
131 /* These should resolve to the position of the last part of the split sequence. */
132 INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
133 INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
134 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM = INDIC_MATRA_CATEGORY_BOTTOM,
135 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
136 INDIC_MATRA_CATEGORY_TOP_AND_LEFT = INDIC_MATRA_CATEGORY_TOP,
137 INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
138 INDIC_MATRA_CATEGORY_TOP_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
140 INDIC_MATRA_CATEGORY_INVISIBLE = INDIC_MATRA_CATEGORY_NOT_APPLICABLE,
141 INDIC_MATRA_CATEGORY_OVERSTRUCK = INDIC_MATRA_CATEGORY_NOT_APPLICABLE,
142 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = INDIC_MATRA_CATEGORY_NOT_APPLICABLE
145 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation
146 * because gcc fails to optimize the latter and fills the table in at runtime. */
147 #define INDIC_COMBINE_CATEGORIES(S,M) \
148 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == INDIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) + \
149 ASSERT_STATIC_EXPR_ZERO (S < 16 && M < 16) + \
153 #include "hb-ot-shape-complex-indic-table.hh"
156 #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7F) == (Base))
158 #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900))
159 #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980))
160 #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00))
161 #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80))
162 #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00))
163 #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80))
164 #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00))
165 #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80))
166 #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00))
167 #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80))
168 #define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780))
171 #define MATRA_POS_LEFT(u) POS_PRE_M
172 #define MATRA_POS_RIGHT(u) ( \
173 IS_DEVA(u) ? POS_AFTER_SUB : \
174 IS_BENG(u) ? POS_AFTER_POST : \
175 IS_GURU(u) ? POS_AFTER_POST : \
176 IS_GUJR(u) ? POS_AFTER_POST : \
177 IS_ORYA(u) ? POS_AFTER_POST : \
178 IS_TAML(u) ? POS_AFTER_POST : \
179 IS_TELU(u) ? (u <= 0x0C42 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \
180 IS_KNDA(u) ? (u < 0x0CC3 || u > 0xCD6 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \
181 IS_MLYM(u) ? POS_AFTER_POST : \
182 IS_SINH(u) ? POS_AFTER_SUB : \
183 IS_KHMR(u) ? POS_AFTER_POST : \
184 /*default*/ POS_AFTER_SUB \
186 #define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \
187 IS_DEVA(u) ? POS_AFTER_SUB : \
188 IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \
189 IS_GUJR(u) ? POS_AFTER_SUB : \
190 IS_ORYA(u) ? POS_AFTER_MAIN : \
191 IS_TAML(u) ? POS_AFTER_SUB : \
192 IS_TELU(u) ? POS_BEFORE_SUB : \
193 IS_KNDA(u) ? POS_BEFORE_SUB : \
194 IS_SINH(u) ? POS_AFTER_SUB : \
195 IS_KHMR(u) ? POS_AFTER_POST : \
196 /*default*/ POS_AFTER_SUB \
198 #define MATRA_POS_BOTTOM(u) ( \
199 IS_DEVA(u) ? POS_AFTER_SUB : \
200 IS_BENG(u) ? POS_AFTER_SUB : \
201 IS_GURU(u) ? POS_AFTER_POST : \
202 IS_GUJR(u) ? POS_AFTER_POST : \
203 IS_ORYA(u) ? POS_AFTER_SUB : \
204 IS_TAML(u) ? POS_AFTER_POST : \
205 IS_TELU(u) ? POS_BEFORE_SUB : \
206 IS_KNDA(u) ? POS_BEFORE_SUB : \
207 IS_MLYM(u) ? POS_AFTER_POST : \
208 IS_SINH(u) ? POS_AFTER_SUB : \
209 IS_KHMR(u) ? POS_AFTER_POST : \
210 /*default*/ POS_AFTER_SUB \
214 static inline indic_position_t
215 matra_position (hb_codepoint_t u, indic_position_t side)
219 case POS_PRE_C: return MATRA_POS_LEFT (u);
220 case POS_POST_C: return MATRA_POS_RIGHT (u);
221 case POS_ABOVE_C: return MATRA_POS_TOP (u);
222 case POS_BELOW_C: return MATRA_POS_BOTTOM (u);
230 * This is a hack for now. We should move this data into the main Indic table.
231 * Or completely remove it and just check in the tables.
233 static const hb_codepoint_t ra_chars[] = {
234 0x0930, /* Devanagari */
235 0x09B0, /* Bengali */
236 0x09F0, /* Bengali */
237 0x0A30, /* Gurmukhi */ /* No Reph */
238 0x0AB0, /* Gujarati */
240 0x0BB0, /* Tamil */ /* No Reph */
241 0x0C30, /* Telugu */ /* Reph formed only with ZWJ */
242 0x0CB0, /* Kannada */
243 0x0D30, /* Malayalam */ /* No Reph, Logical Repha */
245 0x0DBB, /* Sinhala */ /* Reph formed only with ZWJ */
247 0x179A, /* Khmer */ /* No Reph, Visual Repha */
250 static inline indic_position_t
251 consonant_position (hb_codepoint_t u)
253 if ((u & ~0x007F) == 0x1780)
254 return POS_BELOW_C; /* In Khmer coeng model, post and below forms should not be reordered. */
255 return POS_BASE_C; /* Will recategorize later based on font lookups. */
259 is_ra (hb_codepoint_t u)
261 for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++)
262 if (u == ra_chars[i])
269 is_one_of (const hb_glyph_info_t &info, unsigned int flags)
271 /* If it ligated, all bets are off. */
272 if (is_a_ligature (info)) return false;
273 return !!(FLAG (info.indic_category()) & flags);
276 #define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))
278 is_joiner (const hb_glyph_info_t &info)
280 return is_one_of (info, JOINER_FLAGS);
285 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
286 * cannot happen in a consonant syllable. The plus side however is, we can call the
287 * consonant syllable logic from the vowel syllable function and get it all right! */
288 #define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
290 is_consonant (const hb_glyph_info_t &info)
292 return is_one_of (info, CONSONANT_FLAGS);
295 #define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng))
297 is_halant_or_coeng (const hb_glyph_info_t &info)
299 return is_one_of (info, HALANT_OR_COENG_FLAGS);
303 set_indic_properties (hb_glyph_info_t &info)
305 hb_codepoint_t u = info.codepoint;
306 unsigned int type = get_indic_categories (u);
307 indic_category_t cat = (indic_category_t) (type & 0x0F);
308 indic_position_t pos = (indic_position_t) (type >> 4);
316 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
317 * treats U+0951..U+0952 all as OT_VD.
319 * U+092E,U+0947,U+0952
320 * U+092E,U+0952,U+0947
321 * U+092E,U+0947,U+0951
322 * U+092E,U+0951,U+0947
324 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
327 if (unlikely (u == 0x17D1))
330 unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Various signs */
332 /* These are like Top Matras. */
336 if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */
339 if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */
340 else if (unlikely (u == 0x200C)) cat = OT_ZWNJ;
341 else if (unlikely (u == 0x200D)) cat = OT_ZWJ;
342 else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE;
343 else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */
345 if (cat == OT_Repha) {
346 /* There are two kinds of characters marked as Repha:
347 * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
348 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
350 * We recategorize the first kind to look like a Nukta and attached to the base directly.
352 if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
359 * Re-assign position.
362 if ((FLAG (cat) & CONSONANT_FLAGS))
364 pos = consonant_position (u);
368 else if (cat == OT_M)
370 pos = matra_position (u, pos);
372 else if (cat == OT_SM || cat == OT_VD)
377 if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
381 info.indic_category() = cat;
382 info.indic_position() = pos;
387 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */