modules/tibetan/tibetan-fc.c

   1 /* Pango
   2  * tibetan-fc.c: Shaper for Tibetan script
   3  *
   4  * Copyright (C) 2005 DIT, Government of Bhutan <http://www.dit.gov.bt>
   5  * Contact person : Pema Geyleg <pema_geyleg@druknet.bt>
   6  *
   7  *  Based on code from khmer shapers developed by Jens Herden
   8  *  <jens@tibetanos.inf > and Javier Sola <javier@tibetanos.info>
   9  *
  10  * Based on code from other shapers
  11  * Copyright (C) 1999-2004 Red Hat Software
  12  * Author: Owen Taylor <otaylor@redhat.com>
  13
  14  * Partially based on Indic shaper
  15  * Copyright (C) 2001, 2002 IBM Corporation
  16  * Author: Eric Mader <mader@jtcsv.com>
  17  *
  18  * The first module for Tibetan shaper was developed by Mr. Karunakar under
  19  * PanLocalization project.
  20  * Mr. Chris Fynn, Mr.Javier Sola, Mr. Namgay Thinley were involved
  21  * while developing this shaper.
  22  *
  23  * This library is free software; you can redistribute it and/or
  24  * modify it under the terms of the GNU Library General Public
  25  * License as published by the Free Software Foundation; either
  26  * version 2 of the License, or (at your option) any later version.
  27  *
  28  * This library is distributed in the hope that it will be useful,
  29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  31  * Library General Public License for more details.
  32  *
  33  * You should have received a copy of the GNU Library General Public
  34  * License along with this library; if not, write to the
  35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  36  * Boston, MA 02111-1307, USA.
  37  *
  38  * The license on the original Indic shaper code is as follows:
  39  *
  40  *  Permission is hereby granted, free of charge, to any person
  41  * obtaining a copy of this software and associated documentation
  42  * files (the "Software"), to deal in the Software without
  43  * restriction, including without limitation the rights to use, copy,
  44  * modify, merge, publish, distribute, and/or sell copies of the
  45  * Software, and to permit persons to whom the Software is furnished
  46  * to do so, provided that the above copyright notice(s) and this
  47  * permission notice appear in all copies of the Software and that
  48  * both the above copyright notice(s) and this permission notice
  49  * appear in supporting documentation.
  50  *
  51  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  52  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  53  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  54  * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
  55  * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
  56  * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
  57  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  58  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  59  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  60  * OF THIS SOFTWARE.
  61  *
  62  * Except as contained in this notice, the name of a copyright holder
  63  * shall not be used in advertising or otherwise to promote the sale,
  64  * use or other dealings in this Software without prior written
  65  * authorization of the copyright holder.
  66  */
  67 #include "config.h"
  68 #include <string.h>
  69
  70 #include "pango-engine.h"
  71 #include "pango-ot.h"
  72 #include "pango-utils.h"
  73 #include "pangofc-font.h"
  74
  75
  76 #define SCRIPT_ENGINE_NAME "TibetanScriptEngineFc"
  77 #define RENDER_TYPE PANGO_RENDER_TYPE_FC
  78
  79
  80 typedef PangoEngineShape      TibetanEngineFc;
  81 typedef PangoEngineShapeClass TibetanEngineFcClass ;
  82
  83
  84 static PangoEngineScriptInfo tibetan_scripts[] =
  85 {
  86   { PANGO_SCRIPT_TIBETAN, "*" }
  87 };
  88
  89 static PangoEngineInfo script_engines[] =
  90 {
  91   {
  92     SCRIPT_ENGINE_NAME,
  93     PANGO_ENGINE_TYPE_SHAPE,
  94     RENDER_TYPE,
  95     tibetan_scripts, G_N_ELEMENTS (tibetan_scripts)
  96   }
  97 };
  98
  99
 100 /*
 101  * Vocabulary
 102  *     Base ->         A consonant in its full (not subscript) form. It is the
 103  *                   center of the syllable, it can be souranded by subjoined consonants, vowels,
 104  *                   signs... but there is only one base in a stack, it has to be coded as
 105  *                   the first character of the syllable.Included here are also groups of base + subjoined
 106  *                   which are represented by one single code point in unicode (e.g. 0F43) Also other characters
 107  *                   that might take subjoined consonants or other combining characters.
 108  *     Subjoined ->    Subjoined consonants and groups of subjoined consonants which have a single code-point
 109  *                   to repersent the group (even if each subjoined consonant is represented independently
 110  *                   by anothe code-point
 111  *     Tsa Phru -->    Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to
 112  *                   "normalization" is placed after all the subjoined consonants, and it is also permitted there.
 113  *     A Chung  Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
 114  *     Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
 115  *                   a given code-point (in spite of each single part of them having also a code-point
 116  *                   They are avoided, and users are encouraged to use the combination of code-points that
 117  *                   represents the same sound instead of using this combined characters. This is included here
 118  *                   for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
 119  *     Halanta ->      The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel,
 120  *                   in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
 121  *                   but in some special cases it can also be placed after a subjoined consonant, so this is also
 122  *                   permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
 123  *
 124  *     Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
 125  *                   might be as much as three subjoined vowels in a given stack (only one in general text, but up
 126  *                   to three for abreviations, they have to be permitted).
 127  *     Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
 128  *                   times. They can combine with subjoined vowels, and are always coded after these.
 129  *     Anusvara -->    Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
 130  *                   special cases it can be placed before a vowel, so this is also permitted
 131  *     Candrabindu ->  Forms of the Anusvara with different glyphs (and different in identity) which can be placed
 132  *                   without vowel or after the vowel, but never before. Cannot combine with Anusvara.
 133  *     Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
 134  *                   marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
 135  *
 136  *     Digits ->       Digits are not considered as non-combining characters because there are a few characters which
 137  *                   combine with them, so they have to be considered independently.
 138  *     Digit combining marks -> dependent marks that combine with digits.
 139  *
 140  *     TODO
 141  *     There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
 142  *     are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
 143  *     of the tibetan block, they have not been treated in this program.
 144 */
 145
 146
 147 enum TibetanCharClassValues
 148 {
 149         CC_RESERVED             =  0, /* Non Combining Characters*/
 150         CC_BASE                 =  1, /* Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks*/
 151         CC_SUBJOINED            =  2, /* Subjoined Consonats, combination of more than Subjoined Consonants in the code point*/
 152         CC_TSA_PHRU             =  3, /* Tsa-Phru character 0F39*/
 153         CC_A_CHUNG              =  4, /* Vowel Lenthening a-chung mark 0F71*/
 154         CC_COMP_SANSKRIT        =  5, /* Precomposed Sanskrit vowels including Subjoined characters and vowels*/
 155         CC_HALANTA              =  6, /* Halanta Character 0F84*/
 156         CC_BELOW_VOWEL          =  7, /* Subjoined vowels*/
 157         CC_ABOVE_VOWEL          =  8, /* Superscript vowels*/
 158         CC_ANUSVARA             =  9, /* Tibetan sign Rjes Su Nga Ro 0F7E*/
 159         CC_CANDRABINDU          = 10, /* Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83*/
 160         CC_VISARGA              = 11, /* Tibetan sign Rnam Bcad (0F7F)*/
 161         CC_ABOVE_S_MARK         = 12, /* Stress Marks placed above the text*/
 162         CC_BELOW_S_MARK         = 13, /* Stress Marks placed below the text*/
 163         CC_DIGIT                = 14, /* Dzongkha Digits*/
 164         CC_PRE_DIGIT_MARK       = 15, /* Mark placed before the digit*/
 165         CC_POST_BELOW_DIGIT_M   = 16, /* Mark placed below or after the digit*/
 166         CC_COUNT                = 17  /* This is the number of character classes*/
 167 };
 168
 169
 170 enum TibetanCharClassFlags
 171 {
 172         CF_CLASS_MASK    = 0x0000FFFF,
 173
 174         CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable*/
 175         CF_DIGIT         = 0x01000000,  /* flag to speed up comparaisson*/
 176         CF_PREDIGIT      = 0x02000000,  /* flag to detect pre-digit marks for reordering*/
 177
 178         /* position flags*/
 179         CF_POS_BEFORE    = 0x00080000,
 180         CF_POS_BELOW     = 0x00040000,
 181         CF_POS_ABOVE     = 0x00020000,
 182         CF_POS_AFTER     = 0x00010000,
 183         CF_POS_MASK      = 0x000f0000
 184 };
 185
 186
 187 /* Characters that get refrered to by name */
 188 enum TibetanChar
 189 {
 190   C_DOTTED_CIRCLE = 0x25CC,
 191   C_PRE_NUMBER_MARK = 0x0F3F
 192 };
 193
 194
 195 enum
 196 {
 197     /* simple classes, they are used in the statetable (in this file) to control the length of a syllable
 198      * they are also used to know where a character should be placed (location in reference to the base character)
 199      * and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to
 200      * indicate error in syllable construction
 201      */
 202     _xx = CC_RESERVED,
 203     _ba = CC_BASE,
 204     _sj = CC_SUBJOINED | CF_DOTTED_CIRCLE | CF_POS_BELOW,
 205     _tp = CC_TSA_PHRU  | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
 206     _ac = CC_A_CHUNG |  CF_DOTTED_CIRCLE | CF_POS_BELOW,
 207     _cs = CC_COMP_SANSKRIT | CF_DOTTED_CIRCLE | CF_POS_BELOW,
 208     _ha = CC_HALANTA | CF_DOTTED_CIRCLE | CF_POS_BELOW,
 209     _bv = CC_BELOW_VOWEL | CF_DOTTED_CIRCLE | CF_POS_BELOW,
 210     _av = CC_ABOVE_VOWEL | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
 211     _an = CC_ANUSVARA | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
 212     _cb = CC_CANDRABINDU | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
 213     _vs = CC_VISARGA | CF_DOTTED_CIRCLE| CF_POS_AFTER,
 214     _as = CC_ABOVE_S_MARK | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
 215     _bs = CC_BELOW_S_MARK | CF_DOTTED_CIRCLE | CF_POS_BELOW,
 216     _di = CC_DIGIT | CF_DIGIT,
 217     _pd = CC_PRE_DIGIT_MARK | CF_DOTTED_CIRCLE | CF_PREDIGIT | CF_POS_BEFORE ,
 218     _bd = CC_POST_BELOW_DIGIT_M | CF_DOTTED_CIRCLE | CF_POS_AFTER
 219 };
 220
 221
 222 /* Character class: a character class value
 223  * ORed with character class flags.
 224  */
 225 typedef glong TibetanCharClass;
 226 /*_xx Non Combining characters*/
 227 /*_ba Base Consonants*/
 228 /*_sj Subjoined consonants*/
 229 /*_tp Tsa - phru*/
 230 /*_ac A-chung, Vowel Lengthening mark*/
 231 /*_cs Precomposed Sanskrit vowel + subjoined consonants*/
 232 /*_ha Halanta/Virama*/
 233 /*_bv Below vowel*/
 234 /*_av above vowel*/
 235 /*_an Anusvara*/
 236 /*_cb Candrabindu*/
 237 /*_vs Visaraga/Post mark*/
 238 /*_as Upper Stress marks*/
 239 /*_bs Lower Stress marks*/
 240 /*_di Digit*/
 241 /*_pd Number pre combining, Needs reordering*/
 242 /*_bd Other number combining marks*/
 243
 244
 245 static const TibetanCharClass tibetanCharClasses[] =
 246 {
 247   /* 0    1    2    3    4    5    6    7    8    9   a     b   c    d     e   f*/
 248     _xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0F00 - 0F0F 0*/
 249     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, /* 0F10 - 0F1F 1*/
 250     _di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, /* 0F20 - 0F2F 2*/
 251     _xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, /* 0F30 - 0F3F 3*/
 252     _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, /* 0F40 - 0F4F 4*/
 253     _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, /* 0F50 - 0F5F 5*/
 254     _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, /* 0F60 - 0F6F 6*/
 255     _xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, /* 0F70 - 0F7F 7*/
 256     _av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, /* 0F80 - 0F8F 8*/
 257     _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, /* 0F90 - 0F9F 9*/
 258     _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, /* 0FA0 - 0FAF a*/
 259     _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, /* 0FB0 - 0FBF b*/
 260     _xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0FC0 - 0FCF c*/
 261     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0FD0 - 0FDF  d*/
 262     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0FE0 - 0FEF e*/
 263     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0FF0 - 0FFF f*/
 264 };
 265
 266 /* this define must reflect the range of tibetanCharClasses */
 267 /*First Tibetan Character*/
 268 #define firstChar 0x0F00
 269 /*Last Tibetan Character*/
 270 #define lastChar 0x0FFF
 271
 272 /* The stateTable is used to calculate the end (the length) of a well
 273  * formed Tibetan Stack
 274  *
 275  * Each horizontal line is ordered exactly the same way as the values in TibetanClassTable
 276  * CharClassValues.This coincidence of values allows the follow up of the table.
 277  *
 278  * Each line corresponds to a state, which does not necessarily need to be a type
 279  * of component... for example, state 2 is a base, with is always a first character
 280  * in the Stack but the state could be produced a consonant of any type when
 281  * it is the first character that is analysed (in ground state).
 282  */
 283
 284 static const gint8 tibetanStateTable[][CC_COUNT] =
 285 {
 286     /*Dzongkha state table*/
 287     /*xx  ba  sj  tp  ac  cs  ha  bv  av  an  cb  vs  as  bs  di  pd  bd*/
 288     { 1,  2,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, /*  0 - ground state*/
 289     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, /*  1 - exit state (or sign to the right of the syllable)*/
 290     {-1, -1,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  2 - Base consonant*/
 291     {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  3 - Tsa phru after base*/
 292     {-1, -1,  4,  6,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  4 - Subjoined consonant after base*/
 293     {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  5 - Subjoined consonant after tsa phru*/
 294     {-1, -1, -1, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  6 - Tsa phru after subjoined consonant*/
 295     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, /*  7 - Pre Composed Sanskrit*/
 296     {-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /*  8 - A-chung*/
 297     {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, /*  9 - Halanta*/
 298     {-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /* 10 - below vowel 1*/
 299     {-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /* 11 - below vowel 2*/
 300     {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, /* 12 - below vowel 3*/
 301     {-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, /* 13 - Anusvara before vowel*/
 302     {-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, /* 14 - above vowel 1*/
 303     {-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, /* 15 - above vowel 2*/
 304     {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, /* 16 - above vowel 3*/
 305     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, /* 17 - Anusvara or Candrabindu after vowel*/
 306     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, /* 18 - Visarga*/
 307     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, /* 19 - strss mark*/
 308     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, /* 20 - digit*/
 309     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, /* 21 - digit mark*/
 310 };
 311
 312
 313 enum property_flags
 314 {
 315   abvf = 0x0001,
 316   pref = 0x0002,
 317   pstf = 0x0004,
 318   blwf = 0x0008,
 319
 320   pres = 0x0010,
 321   blws = 0x0020,
 322   abvs = 0x0040,
 323   psts = 0x0080,
 324   clig = 0x0100,
 325
 326   dist = 0x0200,
 327   blwm = 0x0400,
 328   abvm = 0x0800,
 329 };
 330
 331
 332 enum properties
 333 {
 334   blwf_p    = /*(blwf | blws | clig | dist | blwm)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
 335   pstf_p    = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm),
 336   abvf_p    = /*(abvf | abvs | clig | dist | abvm)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
 337   pref_p    = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm),
 338   default_p = /*(pres | blws | clig | dist | abvm | blwm)*/ (pref | blwf |abvf | pstf | abvs | psts)
 339 };
 340
 341
 342 /* Below we define how a character in the input string is either in the tibetanCharClasses table
 343  * (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back
 344  */
 345 static TibetanCharClass
 346 get_char_class (gunichar ch)
 347 {
 348
 349   if (ch < firstChar || ch > lastChar)
 350     return CC_RESERVED;
 351
 352   return tibetanCharClasses[ch - firstChar];
 353 }
 354
 355
 356 /* Given an input string of characters and a location in which to start looking
 357  * calculate, using the state table, which one is the last character of the syllable
 358  * that starts in the starting position.
 359  */
 360 static glong
 361 find_syllable (const gunichar *chars,
 362                glong           start,
 363                glong           char_count)
 364 {
 365   glong cursor = start;
 366   gint8 state = 0;
 367   TibetanCharClass charClass;
 368
 369   while (cursor < char_count)
 370     {
 371       charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
 372       state = tibetanStateTable[state][charClass];
 373
 374       if (state < 0)
 375         break;
 376
 377       cursor += 1;
 378     }
 379
 380   return cursor;
 381 }
 382
 383 static const PangoOTFeatureMap gsub_features[] =
 384 {
 385   {"ccmp", PANGO_OT_ALL_GLYPHS},
 386   {"locl", PANGO_OT_ALL_GLYPHS},
 387   {"blws", blws},
 388   {"abvs", abvs},
 389   {"psts", psts},
 390   {"clig", clig},
 391   {"calt", PANGO_OT_ALL_GLYPHS}
 392 };
 393
 394 static const PangoOTFeatureMap gpos_features[] =
 395 {
 396   {"dist", dist},
 397   {"blwm", blwm},
 398   {"abvm", abvm},
 399   {"kern", PANGO_OT_ALL_GLYPHS},
 400   {"mark", PANGO_OT_ALL_GLYPHS},
 401   {"mkmk", PANGO_OT_ALL_GLYPHS}
 402 };
 403
 404 static PangoGlyph
 405 get_index (PangoFcFont *fc_font, gunichar wc)
 406 {
 407   PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
 408   if (!index)
 409     index = PANGO_GET_UNKNOWN_GLYPH ( wc);
 410   return index;
 411 }
 412
 413
 414 static void
 415 tibetan_engine_shape (PangoEngineShape *engine G_GNUC_UNUSED,
 416                     PangoFont        *font,
 417                     const char       *text,
 418                     int               length,
 419                     const PangoAnalysis *analysis,
 420                     PangoGlyphString *glyphs)
 421 {
 422   PangoFcFont *fc_font;
 423   FT_Face face;
 424   PangoOTRulesetDescription desc;
 425   const PangoOTRuleset *ruleset;
 426   PangoOTBuffer *buffer;
 427   glong n_chars;
 428   gunichar *wcs;
 429   const char *p;
 430   int i;
 431   glong syllable;
 432   TibetanCharClass charClass;
 433   glong cursor = 0;
 434
 435   g_return_if_fail (font != NULL);
 436   g_return_if_fail (text != NULL);
 437   g_return_if_fail (length >= 0);
 438   g_return_if_fail (analysis != NULL);
 439
 440   fc_font = PANGO_FC_FONT (font);
 441   face = pango_fc_font_lock_face (fc_font);
 442   if (!face)
 443     return;
 444
 445   buffer = pango_ot_buffer_new (fc_font);
 446   pango_ot_buffer_set_rtl (buffer, analysis->level % 2 != 0);
 447
 448   wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);
 449
 450   p = text;
 451   /* This loop only exits when we reach the end of a run, which may contain
 452    * several syllables.
 453    */
 454   while (cursor < n_chars)
 455     {
 456       syllable = find_syllable (wcs, cursor, n_chars);
 457
 458       /* shall we add a dotted circle?
 459       * If in the position in which the base should be (first char in the string) there is
 460       * a character that has the Dotted circle flag (a character that cannot be a base)
 461       * then write a dotted circle
 462       */
 463       if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
 464         {
 465           pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
 466         }
 467
 468       /* If it encounters a digit followed by number pre combining mark, then reorder the two characters
 469       * coeng Ro if they are present
 470       */
 471       for (i = cursor; i < syllable; i += 1)
 472         {
 473           charClass = get_char_class (wcs[i]);
 474
 475           if ((charClass & CF_DIGIT )
 476               && ( get_char_class (wcs[i+1]) & CF_PREDIGIT))
 477            {
 478                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_PRE_NUMBER_MARK), pref_p, p - text);
 479                          p = g_utf8_next_char (p);
 480                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
 481                                i += 1;
 482          } else {
 483           switch (charClass & CF_POS_MASK)
 484             {
 485               case CF_POS_ABOVE :
 486                              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
 487                              break;
 488
 489               case CF_POS_AFTER :
 490                              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
 491                              break;
 492
 493               case CF_POS_BELOW :
 494                              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
 495                              break;
 496
 497               default:
 498                    /* default - any other characters  */
 499                   pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
 500                   break;
 501             } /* switch */
 502            }
 503
 504           p = g_utf8_next_char (p);
 505         } /* for */
 506
 507       cursor = syllable; /* move the pointer to the start of next syllable */
 508     } /* while */
 509
 510   desc.script = analysis->script;
 511   desc.language = analysis->language;
 512
 513   desc.n_static_gsub_features = G_N_ELEMENTS (gsub_features);
 514   desc.static_gsub_features = gsub_features;
 515   desc.n_static_gpos_features = G_N_ELEMENTS (gpos_features);
 516   desc.static_gpos_features = gpos_features;
 517
 518   /* TODO populate other_features from analysis->extra_attrs */
 519   desc.n_other_features = 0;
 520   desc.other_features = NULL;
 521
 522   ruleset = pango_ot_ruleset_get_for_description (pango_ot_info_get (face), &desc);
 523
 524   pango_ot_ruleset_substitute (ruleset, buffer);
 525   pango_ot_ruleset_position (ruleset, buffer);
 526   pango_ot_buffer_output (buffer, glyphs);
 527
 528   g_free (wcs);
 529   pango_ot_buffer_destroy (buffer);
 530
 531   pango_fc_font_unlock_face (fc_font);
 532 }
 533
 534
 535 static void
 536 tibetan_engine_fc_class_init (PangoEngineShapeClass *class)
 537 {
 538   class->script_shape = tibetan_engine_shape;
 539 }
 540
 541 PANGO_ENGINE_SHAPE_DEFINE_TYPE (TibetanEngineFc, tibetan_engine_fc,
 542                                 tibetan_engine_fc_class_init, NULL)
 543
 544
 545 void
 546 PANGO_MODULE_ENTRY(init) (GTypeModule *module)
 547 {
 548   tibetan_engine_fc_register_type (module);
 549 }
 550
 551
 552 void
 553 PANGO_MODULE_ENTRY(exit) (void)
 554 {
 555 }
 556
 557
 558 void
 559 PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
 560                           int              *n_engines)
 561 {
 562   *engines = script_engines;
 563   *n_engines = G_N_ELEMENTS (script_engines);
 564 }
 565
 566
 567 PangoEngine *
 568 PANGO_MODULE_ENTRY(create) (const char *id)
 569 {
 570   if (!strcmp (id, SCRIPT_ENGINE_NAME))
 571     return g_object_new (tibetan_engine_fc_type, NULL);
 572   else
 573     return NULL;
 574 }