4 * Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved.
5 * Author: Eric Mader <mader@jtcsv.com>
7 * Permission is hereby granted, free of charge, to any person
8 * obtaining a copy of this software and associated documentation
9 * files (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use, copy,
11 * modify, merge, publish, distribute, and/or sell copies of the
12 * Software, and to permit persons to whom the Software is furnished
13 * to do so, provided that the above copyright notice(s) and this
14 * permission notice appear in all copies of the Software and that
15 * both the above copyright notice(s) and this permission notice
16 * appear in supporting documentation.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
23 * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
24 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
25 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
26 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
29 * Except as contained in this notice, the name of a copyright holder
30 * shall not be used in advertising or otherwise to promote the sale,
31 * use or other dealings in this Software without prior written
32 * authorization of the copyright holder.
38 #include "mprefixups.h"
40 * FIXME: should the IndicOutput stuff be moved
41 * to a separate .h and .c file just to keep the
42 * clutter down here? (it's not really usefull
43 * anyplace else, is it?)
49 const glong *fOriginalOffsets;
60 gunichar fAlLakuna; /* to handle Al-Lakuna in sinhala split matras */
63 gboolean fMatraWordStart;
66 MPreFixups *fMPreFixups;
69 typedef struct _Output Output;
71 static void initOutput(Output *output, const glong *originalOffsets, gunichar *outChars, glong *charIndices, gulong *charTags, MPreFixups *mpreFixups)
73 output->fOriginalOffsets = originalOffsets;
75 output->fOutChars = outChars;
76 output->fCharIndices = charIndices;
77 output->fCharTags = charTags;
79 output->fOutIndex = 0;
80 output->fMatraTags = 0;
82 output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0;
84 output->fMPreOutIndex = -1;
85 output->fMPreFixups = mpreFixups;
88 static void saveMatra(Output *output, gunichar matra, IndicOTCharClass matraClass)
90 /* FIXME: check if already set, or if not a matra... */
91 if (IS_M_PRE(matraClass)) {
92 output->fMpre = matra;
93 } else if (IS_M_BELOW(matraClass)) {
94 output->fMbelow = matra;
95 } else if (IS_M_ABOVE(matraClass)) {
96 output->fMabove = matra;
97 } else if (IS_M_POST(matraClass)) {
98 output->fMpost = matra;
99 } else if (IS_LENGTH_MARK(matraClass)) {
100 output->fLengthMark = matra;
101 } else if (IS_AL_LAKUNA(matraClass)) {
102 output->fAlLakuna = matra;
106 static void initMatra(Output *output, guint32 matraIndex, gulong matraTags, gboolean wordStart)
108 output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0;
109 output->fMPreOutIndex = -1;
110 output->fMatraIndex = matraIndex;
111 output->fMatraTags = matraTags;
112 output->fMatraWordStart = wordStart;
115 static gboolean noteMatra(Output *output, const IndicOTClassTable *classTable, gunichar matra)
117 IndicOTCharClass matraClass = indic_ot_get_char_class(classTable, matra);
119 if (IS_MATRA(matraClass)) {
120 if (IS_SPLIT_MATRA(matraClass)) {
121 const IndicOTSplitMatra *splitMatra = indic_ot_get_split_matra(classTable, matraClass);
124 for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) {
125 gunichar piece = (*splitMatra)[i];
126 IndicOTCharClass pieceClass = indic_ot_get_char_class(classTable, piece);
128 saveMatra(output, piece, pieceClass);
131 saveMatra(output, matra, matraClass);
139 static void noteBaseConsonant(Output *output)
141 if (output->fMPreFixups && output->fMPreOutIndex >= 0) {
142 indic_mprefixups_add(output->fMPreFixups, output->fOutIndex, output->fMPreOutIndex);
146 static void writeChar(Output *output, gunichar ch, guint32 charIndex, gulong charTags)
148 if (output->fOutChars != NULL) {
149 output->fOutChars[output->fOutIndex] = ch;
150 output->fCharIndices[output->fOutIndex] = output->fOriginalOffsets[charIndex];
151 output->fCharTags[output->fOutIndex] = charTags;
154 output->fOutIndex += 1;
157 static void writeMpre(Output *output)
159 if (output->fMpre != 0) {
160 gulong tags = output->fMatraTags;
161 if (output->fMatraWordStart)
164 output->fMPreOutIndex = output->fOutIndex;
165 writeChar(output, output->fMpre, output->fMatraIndex, tags);
169 static void writeMbelow(Output *output)
171 if (output->fMbelow != 0) {
172 writeChar(output, output->fMbelow, output->fMatraIndex, output->fMatraTags);
176 static void writeMabove(Output *output)
178 if (output->fMabove != 0) {
179 writeChar(output, output->fMabove, output->fMatraIndex, output->fMatraTags);
183 static void writeMpost(Output *output)
185 if (output->fMpost != 0) {
186 writeChar(output, output->fMpost, output->fMatraIndex, output->fMatraTags);
190 static void writeLengthMark(Output *output)
192 if (output->fLengthMark != 0) {
193 writeChar(output, output->fLengthMark, output->fMatraIndex, output->fMatraTags);
197 static void writeAlLakuna(Output *output)
199 if (output->fAlLakuna != 0) {
200 writeChar(output, output->fAlLakuna, output->fMatraIndex, output->fMatraTags);
204 static glong getOutputIndex(Output *output)
206 return output->fOutIndex;
212 glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong char_count, const IndicOTClassTable *class_table, gunichar *out_chars, glong *char_indices, gulong *char_tags, MPreFixups **outMPreFixups)
214 MPreFixups *mpreFixups = NULL;
217 gboolean last_in_word = FALSE;
219 if (outMPreFixups && (class_table->scriptFlags & SF_MPRE_FIXUP)) {
220 mpreFixups = indic_mprefixups_new (char_count);
223 initOutput(&output, utf8_offsets, out_chars, char_indices, char_tags, mpreFixups);
225 while (prev < char_count) {
226 glong syllable = indic_ot_find_syllable(class_table, chars, prev, char_count);
227 glong matra, vmabove, vmpost = syllable;
229 while (vmpost > prev && indic_ot_is_vm_post(class_table, chars[vmpost - 1])) {
234 while (vmabove > prev && indic_ot_is_vm_above(class_table, chars[vmabove - 1])) {
239 initMatra(&output, prev, blwf_p, !last_in_word);
240 while (noteMatra(&output, class_table, chars[matra]) &&
245 switch (indic_ot_get_char_class(class_table, chars[prev]) & CF_CLASS_MASK) {
247 last_in_word = FALSE;
249 case CC_INDEPENDENT_VOWEL:
250 case CC_ZERO_WIDTH_MARK:
251 for (i = prev; i < syllable; i += 1) {
252 writeChar(&output, chars[i], /*i*/ prev, blwf_p);
257 case CC_MODIFYING_MARK_ABOVE:
258 case CC_MODIFYING_MARK_POST:
261 /* patch for rendering fix for Malayalam SAMVRUTHOKARA by suresh */
262 if (chars[prev - 1] == 0x0D41) {
263 writeChar(&output, chars[prev], prev, blwf_p);
269 writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p);
270 writeChar(&output, chars[prev], prev, blwf_p);
273 case CC_DEPENDENT_VOWEL:
275 writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p);
276 writeMbelow(&output);
277 writeMabove(&output);
279 writeLengthMark(&output);
280 writeAlLakuna(&output);
284 case CC_CONSONANT_WITH_NUKTA:
286 guint32 length = vmabove - prev;
287 glong lastConsonant = vmabove - 1;
288 glong baseLimit = prev;
289 glong baseConsonant, postBase, postBaseLimit;
290 gboolean seenVattu, seenBelowBaseForm, supressVattu;
293 /* Check for REPH at front of syllable */
294 if (length > 2 && indic_ot_is_reph(class_table, chars[prev]) && indic_ot_is_virama(class_table, chars[prev + 1])) {
297 /* Check for eyelash RA, if the script supports it */
298 if ((class_table->scriptFlags & SF_EYELASH_RA) != 0 &&
299 chars[baseLimit] == C_SIGN_ZWJ) {
308 while (lastConsonant > baseLimit && !indic_ot_is_consonant(class_table, chars[lastConsonant])) {
312 baseConsonant = lastConsonant;
313 postBase = lastConsonant + 1;
315 postBaseLimit = class_table->scriptFlags & SF_POST_BASE_LIMIT_MASK;
317 seenBelowBaseForm = false;
320 while (baseConsonant > baseLimit) {
321 IndicOTCharClass charClass = indic_ot_get_char_class(class_table, chars[baseConsonant]);
323 if (IS_CONSONANT(charClass)) {
324 if (postBaseLimit == 0 || seenVattu ||
325 (baseConsonant > baseLimit && !indic_ot_is_virama(class_table, chars[baseConsonant - 1])) ||
326 !HAS_POST_OR_BELOW_BASE_FORM(charClass)) {
330 seenVattu = IS_VATTU(charClass);
332 if (HAS_POST_BASE_FORM(charClass)) {
333 if (seenBelowBaseForm) {
337 postBase = baseConsonant;
338 } else if (HAS_BELOW_BASE_FORM(charClass)) {
339 seenBelowBaseForm = true;
351 /* Write eyelash RA */
352 /* NOTE: baseLimit == prev + 3 iff eyelash RA present... */
353 if (baseLimit == prev + 3) {
354 writeChar(&output, chars[prev], prev, half_p);
355 writeChar(&output, chars[prev + 1], prev /*+ 1*/, half_p);
356 writeChar(&output, chars[prev + 2], prev /*+ 2*/, half_p);
359 /* write any pre-base consonants */
362 for (i = baseLimit; i < baseConsonant; i += 1) {
363 gunichar ch = chars[i];
364 /* Applying blwf to the first consonant doesn't makes sense
365 * since the below-form follows the consonant that it is
367 gulong tag = (i == baseLimit) ? half_p : blwf_p;
368 IndicOTCharClass charClass = indic_ot_get_char_class(class_table, ch);
370 if (IS_CONSONANT(charClass)) {
371 if (IS_VATTU(charClass) && supressVattu) {
374 else if ((i + 2 < baseConsonant) && (chars[i + 2] == C_SIGN_ZWNJ)) {
378 supressVattu = IS_VATTU(charClass);
379 } else if (IS_VIRAMA(charClass) && chars[i + 1] == C_SIGN_ZWNJ)
384 writeChar(&output, ch, /*i*/ prev, tag);
387 bcSpan = baseConsonant + 1;
389 if (bcSpan < vmabove && indic_ot_is_nukta(class_table, chars[bcSpan])) {
393 if (baseConsonant == lastConsonant && bcSpan < vmabove && indic_ot_is_virama(class_table, chars[bcSpan])) {
396 if (bcSpan < vmabove && chars[bcSpan] == C_SIGN_ZWNJ) {
401 /* note the base consonant for post-GSUB fixups */
402 noteBaseConsonant(&output);
404 /* write base consonant */
405 for (i = baseConsonant; i < bcSpan; i += 1) {
406 writeChar(&output, chars[i], /*i*/ prev, nukt_p);
409 if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) {
410 gboolean is_for_0C48 = FALSE;
411 if (output.fOutChars != NULL) { /*for 0x0C48 of Telugu*/
413 for (t = prev; t < syllable; t++) {
414 if (chars[t] == 0x0C48) {
415 writeMabove(&output);
416 writeMbelow(&output);
426 writeMbelow(&output);
427 writeMabove(&output);
432 /* write below-base consonants */
433 if (baseConsonant != lastConsonant) {
434 for (i = bcSpan + 1; i < postBase; i += 1) {
435 writeChar(&output, chars[i], /*i*/ prev, blwf_p);
438 if (postBase > lastConsonant) {
439 /* write halant that was after base consonant */
440 writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p);
444 /* write Mbelow, Mabove */
445 if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) {
446 writeMbelow(&output);
447 writeMabove(&output);
450 if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) != 0) {
451 if (baseLimit == prev + 2) {
452 writeChar(&output, chars[prev], prev, rphf_p);
453 writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p);
457 for (i = vmabove; i < vmpost; i += 1) {
458 writeChar(&output, chars[i], /*i*/ prev, blwf_p);
462 /* write post-base consonants */
463 if (baseConsonant != lastConsonant) {
464 if (postBase <= lastConsonant) {
465 for (i = postBase; i <= lastConsonant; i += 1) {
466 writeChar(&output, chars[i], /*i*/ prev, pstf_p);
469 /* write halant that was after base consonant */
470 writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p);
473 /* write the training halant, if there is one */
474 if (lastConsonant < matra && indic_ot_is_virama(class_table, chars[matra])) {
475 writeChar(&output, chars[matra], /*matra*/ prev, nukt_p);
480 if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) {
484 writeLengthMark(&output);
485 writeAlLakuna(&output);
488 if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
489 if (baseLimit == prev + 2) {
490 writeChar(&output, chars[prev], prev, rphf_p);
491 writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p);
495 for (i = vmabove; i < vmpost; i += 1) {
496 writeChar(&output, chars[i], /*i*/ prev, blwf_p);
501 for (i = vmpost; i < syllable; i += 1) {
502 writeChar(&output, chars[i], /*i*/ prev, blwf_p);
517 *outMPreFixups = mpreFixups;
520 return getOutputIndex(&output);