[indic] Add syllable recognition state machine
[framework/uifw/harfbuzz.git] / src / hb-ot-shape-complex-indic.cc
1 /*
2  * Copyright © 2011  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26
27 #include "hb-ot-shape-complex-private.hh"
28
29 HB_BEGIN_DECLS
30
31
32 /* buffer var allocations */
33 #define indic_category() var2.u8[0] /* indic_category_t */
34 #define indic_position() var2.u8[1] /* indic_matra_category_t */
35
36 #define INDIC_TABLE_ELEMENT_TYPE uint8_t
37
38 /* Cateories used in the OpenType spec:
39  * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
40  */
41 enum indic_category_t {
42   OT_X = 0,
43   OT_C,
44   OT_Ra,
45   OT_V,
46   OT_N,
47   OT_H,
48   OT_ZWNJ,
49   OT_ZWJ,
50   OT_M,
51   OT_SM,
52   OT_VD,
53   OT_A,
54   OT_NBSP
55 };
56
57 /* Categories used in IndicSyllabicCategory.txt from UCD */
58 /* The assignments are guesswork */
59 enum indic_syllabic_category_t {
60   INDIC_SYLLABIC_CATEGORY_OTHER                 = OT_X,
61
62   INDIC_SYLLABIC_CATEGORY_AVAGRAHA              = OT_X,
63   INDIC_SYLLABIC_CATEGORY_BINDU                 = OT_SM,
64   INDIC_SYLLABIC_CATEGORY_CONSONANT             = OT_C,
65   INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD        = OT_C,
66   INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL       = OT_C,
67   INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C,
68   INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL      = OT_C,
69   INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
70   INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED   = OT_C,
71   INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA       = OT_C,
72   INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER      = OT_X,
73   INDIC_SYLLABIC_CATEGORY_NUKTA                 = OT_N,
74   INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER      = OT_X,
75   INDIC_SYLLABIC_CATEGORY_TONE_LETTER           = OT_X,
76   INDIC_SYLLABIC_CATEGORY_TONE_MARK             = OT_X,
77   INDIC_SYLLABIC_CATEGORY_VIRAMA                = OT_H,
78   INDIC_SYLLABIC_CATEGORY_VISARGA               = OT_SM,
79   INDIC_SYLLABIC_CATEGORY_VOWEL                 = OT_V,
80   INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT       = OT_M,
81   INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT     = OT_V
82 };
83
84 /* Categories used in IndicSMatraCategory.txt from UCD */
85 enum indic_matra_category_t {
86   INDIC_MATRA_CATEGORY_NOT_APPLICABLE           = 0,
87
88   INDIC_MATRA_CATEGORY_LEFT                     = 0x01,
89   INDIC_MATRA_CATEGORY_TOP                      = 0x02,
90   INDIC_MATRA_CATEGORY_BOTTOM                   = 0x04,
91   INDIC_MATRA_CATEGORY_RIGHT                    = 0x08,
92
93   /* We don't really care much about these since we decompose them
94    * in the generic pre-shaping layer. */
95   INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT         = INDIC_MATRA_CATEGORY_BOTTOM +
96                                                   INDIC_MATRA_CATEGORY_RIGHT,
97   INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT           = INDIC_MATRA_CATEGORY_LEFT +
98                                                   INDIC_MATRA_CATEGORY_RIGHT,
99   INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM           = INDIC_MATRA_CATEGORY_TOP +
100                                                   INDIC_MATRA_CATEGORY_BOTTOM,
101   INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_TOP +
102                                                   INDIC_MATRA_CATEGORY_BOTTOM +
103                                                   INDIC_MATRA_CATEGORY_RIGHT,
104   INDIC_MATRA_CATEGORY_TOP_AND_LEFT             = INDIC_MATRA_CATEGORY_TOP +
105                                                   INDIC_MATRA_CATEGORY_LEFT,
106   INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT   = INDIC_MATRA_CATEGORY_TOP +
107                                                   INDIC_MATRA_CATEGORY_LEFT +
108                                                   INDIC_MATRA_CATEGORY_RIGHT,
109   INDIC_MATRA_CATEGORY_TOP_AND_RIGHT            = INDIC_MATRA_CATEGORY_TOP +
110                                                   INDIC_MATRA_CATEGORY_RIGHT,
111
112   INDIC_MATRA_CATEGORY_INVISIBLE                = INDIC_MATRA_CATEGORY_NOT_APPLICABLE,
113   INDIC_MATRA_CATEGORY_OVERSTRUCK               = INDIC_MATRA_CATEGORY_NOT_APPLICABLE,
114   INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT        = INDIC_MATRA_CATEGORY_NOT_APPLICABLE
115 };
116
117 #define INDIC_COMBINE_CATEGORIES(S,M) \
118   (ASSERT_STATIC_EXPR (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == INDIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)), \
119    ASSERT_STATIC_EXPR (S < 16 && M < 16), \
120    (M << 4) | S)
121
122 #include "hb-ot-shape-complex-indic-table.hh"
123
124 static const hb_tag_t indic_basic_features[] =
125 {
126   HB_TAG('n','u','k','t'),
127   HB_TAG('a','k','h','n'),
128   HB_TAG('r','p','h','f'),
129   HB_TAG('r','k','r','f'),
130   HB_TAG('p','r','e','f'),
131   HB_TAG('b','l','w','f'),
132   HB_TAG('h','a','l','f'),
133   HB_TAG('v','a','t','u'),
134   HB_TAG('p','s','t','f'),
135   HB_TAG('c','j','c','t'),
136 };
137
138 static const hb_tag_t indic_other_features[] =
139 {
140   HB_TAG('p','r','e','s'),
141   HB_TAG('a','b','v','s'),
142   HB_TAG('b','l','w','s'),
143   HB_TAG('p','s','t','s'),
144   HB_TAG('h','a','l','n'),
145
146   HB_TAG('d','i','s','t'),
147   HB_TAG('a','b','v','m'),
148   HB_TAG('b','l','w','m'),
149 };
150
151
152 void
153 _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED)
154 {
155   for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++)
156     planner->map.add_bool_feature (indic_basic_features[i], false);
157
158   for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++)
159     planner->map.add_bool_feature (indic_other_features[i], true);
160 }
161
162
163
164 #include "hb-ot-shape-complex-indic-machine.hh"
165
166
167 void
168 _hb_ot_shape_complex_setup_masks_indic  (hb_ot_shape_context_t *c)
169 {
170   unsigned int count = c->buffer->len;
171
172   for (unsigned int i = 0; i < count; i++)
173   {
174     unsigned int type = get_indic_categories (c->buffer->info[i].codepoint);
175
176     c->buffer->info[i].indic_category() = type & 0x0F;
177     c->buffer->info[i].indic_position() = type >> 4;
178   }
179
180   find_syllables (c);
181
182   hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
183   unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
184   for (unsigned int i = 0; i < num_masks; i++)
185     mask_array[i] = c->plan->map.get_1_mask (indic_basic_features[i]);
186 }
187
188
189 HB_END_DECLS