2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
4 * This is part of HarfBuzz, an OpenType Layout engine library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
30 static const HB_UChar16 ReplacementCharacter = 0xfffd;
34 unsigned char justification;
35 } HB_ArabicProperties;
42 /* intermediate state */
47 // these groups correspond to the groups defined in the Unicode standard.
48 // Some of these groups are equal with regards to both joining and line breaking behaviour,
49 // and thus have the same enum value
51 // I'm not sure the mapping of syriac to arabic enums is correct with regards to justification, but as
52 // I couldn't find any better document I'll hope for the best.
91 YehWithTail = HamzaOnHehGoal,
92 YehBarre = HamzaOnHehGoal,
121 Sadhe = HamzaOnHehGoal,
124 /* Compiler bug? Otherwise ArabicGroupsEnd would be equal to Dal + 1. */
125 Dummy = HamzaOnHehGoal,
129 static const unsigned char arabic_group[0x150] = {
130 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
131 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
132 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
133 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
135 Transparent, Transparent, Transparent, Transparent,
136 Transparent, Transparent, ArabicNone, ArabicNone,
137 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
138 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
140 ArabicNone, ArabicNone, Alef, Alef,
141 Waw, Alef, Yeh, Alef,
142 Beh, TehMarbuta, Beh, Beh,
147 Tah, Ain, Ain, ArabicNone,
148 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
151 Kashida, Feh, Qaf, Kaf,
152 Lam, Meem, Noon, Heh,
153 Waw, Yeh, Yeh, Transparent,
154 Transparent, Transparent, Transparent, Transparent,
156 Transparent, Transparent, Transparent, Transparent,
157 Transparent, Transparent, Transparent, Transparent,
158 Transparent, ArabicNone, ArabicNone, ArabicNone,
159 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
161 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
162 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
163 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
164 ArabicNone, ArabicNone, Beh, Qaf,
166 Transparent, Alef, Alef, Alef,
167 ArabicNone, Alef, Waw, Waw,
179 Reh, Reh, Seen, Seen,
184 Qaf, Gaf, SwashKaf, Gaf,
189 Lam, Noon, Noon, Noon,
190 Noon, Noon, KnottedHeh, Hah,
193 TehMarbuta, HehGoal, HamzaOnHehGoal, HamzaOnHehGoal,
196 Yeh, YehWithTail, Yeh, Waw,
198 Yeh, Yeh, YehBarre, YehBarre,
199 ArabicNone, TehMarbuta, Transparent, Transparent,
200 Transparent, Transparent, Transparent, Transparent,
201 Transparent, ArabicNone, ArabicNone, Transparent,
203 Transparent, Transparent, Transparent, Transparent,
204 Transparent, ArabicNone, ArabicNone, Transparent,
205 Transparent, ArabicNone, Transparent, Transparent,
206 Transparent, Transparent, Dal, Reh,
208 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
209 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
210 ArabicNone, ArabicNone, Seen, Sad,
211 Ain, ArabicNone, ArabicNone, KnottedHeh,
214 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
215 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
216 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
217 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
219 Alaph, Transparent, Beth, Gamal,
220 Gamal, Dalath, Dalath, He,
221 SyriacWaw, Zain, Heth, Teth,
222 Teth, Yudh, YudhHe, Kaph,
224 Lamadh, Mim, Nun, Semakh,
225 FinalSemakh, SyriacE, Pe, ReversedPe,
226 Sadhe, Qaph, Dalath, Shin,
227 Taw, Beth, Gamal, Dalath,
229 Transparent, Transparent, Transparent, Transparent,
230 Transparent, Transparent, Transparent, Transparent,
231 Transparent, Transparent, Transparent, Transparent,
232 Transparent, Transparent, Transparent, Transparent,
234 Transparent, Transparent, Transparent, Transparent,
235 Transparent, Transparent, Transparent, Transparent,
236 Transparent, Transparent, Transparent, ArabicNone,
237 ArabicNone, Zain, Kaph, Fe,
240 static ArabicGroup arabicGroup(unsigned short uc)
242 if (uc >= 0x0600 && uc < 0x750)
243 return (ArabicGroup) arabic_group[uc-0x600];
244 else if (uc == 0x200d)
246 else if (HB_GetUnicodeCharCategory(uc) == HB_Separator_Space)
254 Arabic shaping obeys a number of rules according to the joining classes (see Unicode book, section on
257 Each unicode char has a joining class (right, dual (left&right), center (joincausing) or transparent).
258 transparent joining is not encoded in HB_UChar16::joining(), but applies to all combining marks and format marks.
260 Right join-causing: dual + center
261 Left join-causing: dual + right + center
263 Rules are as follows (for a string already in visual order, as we have it here):
265 R1 Transparent characters do not affect joining behaviour.
266 R2 A right joining character, that has a right join-causing char on the right will get form XRight
267 (R3 A left joining character, that has a left join-causing char on the left will get form XLeft)
268 Note: the above rule is meaningless, as there are no pure left joining characters defined in Unicode
269 R4 A dual joining character, that has a left join-causing char on the left and a right join-causing char on
270 the right will get form XMedial
271 R5 A dual joining character, that has a right join causing char on the right, and no left join causing char on the left
273 R6 A dual joining character, that has a left join causing char on the left, and no right join causing char on the right
275 R7 Otherwise the character will get form XIsolated
277 Additionally we have to do the minimal ligature support for lam-alef ligatures:
279 L1 Transparent characters do not affect ligature behaviour.
280 L2 Any sequence of Alef(XRight) + Lam(XMedial) will form the ligature Alef.Lam(XLeft)
281 L3 Any sequence of Alef(XRight) + Lam(XLeft) will form the ligature Alef.Lam(XIsolated)
283 The state table below handles rules R1-R7.
294 static const Joining joining_for_group[ArabicGroupsEnd] = {
296 JNone, /* ArabicNone */
297 JNone, /* ArabicSpace */
299 JTransparent, /* Transparent */
301 JCausing, /* Center */
302 JCausing, /* Kashida */
316 JRight /* HamzaOnHehGoal */
325 static const JoiningPair joining_table[5][4] =
326 /* None, Causing, Dual, Right */
328 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XInitial }, { XIsolated, XIsolated } }, /* XIsolated */
329 { { XFinal, XIsolated }, { XFinal, XCausing }, { XFinal, XInitial }, { XFinal, XIsolated } }, /* XFinal */
330 { { XIsolated, XIsolated }, { XInitial, XCausing }, { XInitial, XMedial }, { XInitial, XFinal } }, /* XInitial */
331 { { XFinal, XIsolated }, { XMedial, XCausing }, { XMedial, XMedial }, { XMedial, XFinal } }, /* XMedial */
332 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XMedial }, { XIsolated, XFinal } }, /* XCausing */
337 According to http://www.microsoft.com/middleeast/Arabicdev/IE6/KBase.asp
339 1. Find the priority of the connecting opportunities in each word
340 2. Add expansion at the highest priority connection opportunity
341 3. If more than one connection opportunity have the same highest value,
342 use the opportunity closest to the end of the word.
344 Following is a chart that provides the priority for connection
345 opportunities and where expansion occurs. The character group names
346 are those in table 6.6 of the UNICODE 2.0 book.
349 PrioritY Glyph Condition Kashida Location
351 Arabic_Kashida User inserted Kashida The user entered a Kashida in a position. After the user
352 (Shift+j or Shift+[E with hat]) Thus, it is the highest priority to insert an inserted kashida
355 Arabic_Seen Seen, Sad Connecting to the next character. After the character.
356 (Initial or medial form).
358 Arabic_HaaDal Teh Marbutah, Haa, Dal Connecting to previous character. Before the final form
361 Arabic_Alef Alef, Tah, Lam, Connecting to previous character. Before the final form
362 Kaf and Gaf of these characters.
364 Arabic_BaRa Reh, Yeh Connected to medial Beh Before preceding medial Baa
366 Arabic_Waw Waw, Ain, Qaf, Feh Connecting to previous character. Before the final form of
369 Arabic_Normal Other connecting Connecting to previous character. Before the final form
370 characters of these characters.
374 This seems to imply that we have at most one kashida point per arabic word.
378 static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
380 /* qDebug("arabicSyriacOpenTypeShape: properties:"); */
382 int lastGroup = ArabicNone;
385 ArabicGroup group = arabicGroup(chars[0]);
386 Joining j = joining_for_group[group];
387 ArabicShape shape = joining_table[XIsolated][j].form2;
388 properties[0].justification = HB_NoJustification;
390 for (i = 1; i < len; ++i) {
391 /* #### fix handling for spaces and punktuation */
392 properties[i].justification = HB_NoJustification;
394 group = arabicGroup(chars[i]);
395 j = joining_for_group[group];
397 if (j == JTransparent) {
398 properties[i].shape = XIsolated;
402 properties[lastPos].shape = joining_table[shape][j].form1;
403 shape = joining_table[shape][j].form2;
407 if (properties[lastPos].shape == XInitial || properties[lastPos].shape == XMedial)
408 properties[i-1].justification = HB_Arabic_Seen;
411 if (properties[lastPos].shape == XFinal)
412 properties[lastPos-1].justification = HB_Arabic_HaaDal;
415 if (properties[lastPos].shape == XFinal)
416 properties[lastPos-1].justification = HB_Arabic_Alef;
419 if (properties[lastPos].shape == XFinal)
420 properties[lastPos-1].justification = HB_Arabic_Waw;
423 if (properties[lastPos].shape == XFinal)
424 properties[lastPos-1].justification = HB_Arabic_Normal;
433 lastGroup = ArabicNone;
438 /* ### Center should probably be treated as transparent when it comes to justification. */
442 properties[i].justification = HB_Arabic_Space;
445 properties[i].justification = HB_Arabic_Kashida;
463 if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh)
464 properties[lastPos-1].justification = HB_Arabic_BaRa;
477 case ArabicGroupsEnd:
483 properties[lastPos].shape = joining_table[shape][JNone].form1;
487 for (int i = 0; i < len; ++i)
488 qDebug("arabic properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification);
492 static Joining getNkoJoining(unsigned short uc)
507 static void getNkoProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
512 Joining j = getNkoJoining(chars[0]);
513 ArabicShape shape = joining_table[XIsolated][j].form2;
514 properties[0].justification = HB_NoJustification;
516 for (i = 1; i < len; ++i) {
517 properties[i].justification = (HB_GetUnicodeCharCategory(chars[i]) == HB_Separator_Space) ?
518 ArabicSpace : ArabicNone;
520 j = getNkoJoining(chars[i]);
522 if (j == JTransparent) {
523 properties[i].shape = XIsolated;
527 properties[lastPos].shape = joining_table[shape][j].form1;
528 shape = joining_table[shape][j].form2;
533 properties[lastPos].shape = joining_table[shape][JNone].form1;
537 for (int i = 0; i < len; ++i)
538 qDebug("nko properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification);
543 // The unicode to unicode shaping codec.
544 // does only presentation forms B at the moment, but that should be enough for
547 static const hb_uint16 arabicUnicodeMapping[256][2] = {
548 /* base of shaped forms, and number-1 of them (0 for non shaping,
549 1 for right binding and 3 for dual binding */
551 /* These are just the glyphs available in Unicode,
552 some characters are in R class, but have no glyphs in Unicode. */
554 { 0x0600, 0 }, /* 0x0600 */
555 { 0x0601, 0 }, /* 0x0601 */
556 { 0x0602, 0 }, /* 0x0602 */
557 { 0x0603, 0 }, /* 0x0603 */
558 { 0x0604, 0 }, /* 0x0604 */
559 { 0x0605, 0 }, /* 0x0605 */
560 { 0x0606, 0 }, /* 0x0606 */
561 { 0x0607, 0 }, /* 0x0607 */
562 { 0x0608, 0 }, /* 0x0608 */
563 { 0x0609, 0 }, /* 0x0609 */
564 { 0x060A, 0 }, /* 0x060A */
565 { 0x060B, 0 }, /* 0x060B */
566 { 0x060C, 0 }, /* 0x060C */
567 { 0x060D, 0 }, /* 0x060D */
568 { 0x060E, 0 }, /* 0x060E */
569 { 0x060F, 0 }, /* 0x060F */
571 { 0x0610, 0 }, /* 0x0610 */
572 { 0x0611, 0 }, /* 0x0611 */
573 { 0x0612, 0 }, /* 0x0612 */
574 { 0x0613, 0 }, /* 0x0613 */
575 { 0x0614, 0 }, /* 0x0614 */
576 { 0x0615, 0 }, /* 0x0615 */
577 { 0x0616, 0 }, /* 0x0616 */
578 { 0x0617, 0 }, /* 0x0617 */
579 { 0x0618, 0 }, /* 0x0618 */
580 { 0x0619, 0 }, /* 0x0619 */
581 { 0x061A, 0 }, /* 0x061A */
582 { 0x061B, 0 }, /* 0x061B */
583 { 0x061C, 0 }, /* 0x061C */
584 { 0x061D, 0 }, /* 0x061D */
585 { 0x061E, 0 }, /* 0x061E */
586 { 0x061F, 0 }, /* 0x061F */
588 { 0x0620, 0 }, /* 0x0620 */
589 { 0xFE80, 0 }, /* 0x0621 HAMZA */
590 { 0xFE81, 1 }, /* 0x0622 R ALEF WITH MADDA ABOVE */
591 { 0xFE83, 1 }, /* 0x0623 R ALEF WITH HAMZA ABOVE */
592 { 0xFE85, 1 }, /* 0x0624 R WAW WITH HAMZA ABOVE */
593 { 0xFE87, 1 }, /* 0x0625 R ALEF WITH HAMZA BELOW */
594 { 0xFE89, 3 }, /* 0x0626 D YEH WITH HAMZA ABOVE */
595 { 0xFE8D, 1 }, /* 0x0627 R ALEF */
596 { 0xFE8F, 3 }, /* 0x0628 D BEH */
597 { 0xFE93, 1 }, /* 0x0629 R TEH MARBUTA */
598 { 0xFE95, 3 }, /* 0x062A D TEH */
599 { 0xFE99, 3 }, /* 0x062B D THEH */
600 { 0xFE9D, 3 }, /* 0x062C D JEEM */
601 { 0xFEA1, 3 }, /* 0x062D D HAH */
602 { 0xFEA5, 3 }, /* 0x062E D KHAH */
603 { 0xFEA9, 1 }, /* 0x062F R DAL */
605 { 0xFEAB, 1 }, /* 0x0630 R THAL */
606 { 0xFEAD, 1 }, /* 0x0631 R REH */
607 { 0xFEAF, 1 }, /* 0x0632 R ZAIN */
608 { 0xFEB1, 3 }, /* 0x0633 D SEEN */
609 { 0xFEB5, 3 }, /* 0x0634 D SHEEN */
610 { 0xFEB9, 3 }, /* 0x0635 D SAD */
611 { 0xFEBD, 3 }, /* 0x0636 D DAD */
612 { 0xFEC1, 3 }, /* 0x0637 D TAH */
613 { 0xFEC5, 3 }, /* 0x0638 D ZAH */
614 { 0xFEC9, 3 }, /* 0x0639 D AIN */
615 { 0xFECD, 3 }, /* 0x063A D GHAIN */
616 { 0x063B, 0 }, /* 0x063B */
617 { 0x063C, 0 }, /* 0x063C */
618 { 0x063D, 0 }, /* 0x063D */
619 { 0x063E, 0 }, /* 0x063E */
620 { 0x063F, 0 }, /* 0x063F */
622 { 0x0640, 0 }, /* 0x0640 C TATWEEL // ### Join Causing, only one glyph */
623 { 0xFED1, 3 }, /* 0x0641 D FEH */
624 { 0xFED5, 3 }, /* 0x0642 D QAF */
625 { 0xFED9, 3 }, /* 0x0643 D KAF */
626 { 0xFEDD, 3 }, /* 0x0644 D LAM */
627 { 0xFEE1, 3 }, /* 0x0645 D MEEM */
628 { 0xFEE5, 3 }, /* 0x0646 D NOON */
629 { 0xFEE9, 3 }, /* 0x0647 D HEH */
630 { 0xFEED, 1 }, /* 0x0648 R WAW */
631 { 0x0649, 3 }, /* 0x0649 ALEF MAKSURA // ### Dual, glyphs not consecutive, handle in code. */
632 { 0xFEF1, 3 }, /* 0x064A D YEH */
633 { 0x064B, 0 }, /* 0x064B */
634 { 0x064C, 0 }, /* 0x064C */
635 { 0x064D, 0 }, /* 0x064D */
636 { 0x064E, 0 }, /* 0x064E */
637 { 0x064F, 0 }, /* 0x064F */
639 { 0x0650, 0 }, /* 0x0650 */
640 { 0x0651, 0 }, /* 0x0651 */
641 { 0x0652, 0 }, /* 0x0652 */
642 { 0x0653, 0 }, /* 0x0653 */
643 { 0x0654, 0 }, /* 0x0654 */
644 { 0x0655, 0 }, /* 0x0655 */
645 { 0x0656, 0 }, /* 0x0656 */
646 { 0x0657, 0 }, /* 0x0657 */
647 { 0x0658, 0 }, /* 0x0658 */
648 { 0x0659, 0 }, /* 0x0659 */
649 { 0x065A, 0 }, /* 0x065A */
650 { 0x065B, 0 }, /* 0x065B */
651 { 0x065C, 0 }, /* 0x065C */
652 { 0x065D, 0 }, /* 0x065D */
653 { 0x065E, 0 }, /* 0x065E */
654 { 0x065F, 0 }, /* 0x065F */
656 { 0x0660, 0 }, /* 0x0660 */
657 { 0x0661, 0 }, /* 0x0661 */
658 { 0x0662, 0 }, /* 0x0662 */
659 { 0x0663, 0 }, /* 0x0663 */
660 { 0x0664, 0 }, /* 0x0664 */
661 { 0x0665, 0 }, /* 0x0665 */
662 { 0x0666, 0 }, /* 0x0666 */
663 { 0x0667, 0 }, /* 0x0667 */
664 { 0x0668, 0 }, /* 0x0668 */
665 { 0x0669, 0 }, /* 0x0669 */
666 { 0x066A, 0 }, /* 0x066A */
667 { 0x066B, 0 }, /* 0x066B */
668 { 0x066C, 0 }, /* 0x066C */
669 { 0x066D, 0 }, /* 0x066D */
670 { 0x066E, 0 }, /* 0x066E */
671 { 0x066F, 0 }, /* 0x066F */
673 { 0x0670, 0 }, /* 0x0670 */
674 { 0xFB50, 1 }, /* 0x0671 R ALEF WASLA */
675 { 0x0672, 0 }, /* 0x0672 */
676 { 0x0673, 0 }, /* 0x0673 */
677 { 0x0674, 0 }, /* 0x0674 */
678 { 0x0675, 0 }, /* 0x0675 */
679 { 0x0676, 0 }, /* 0x0676 */
680 { 0x0677, 0 }, /* 0x0677 */
681 { 0x0678, 0 }, /* 0x0678 */
682 { 0xFB66, 3 }, /* 0x0679 D TTEH */
683 { 0xFB5E, 3 }, /* 0x067A D TTEHEH */
684 { 0xFB52, 3 }, /* 0x067B D BEEH */
685 { 0x067C, 0 }, /* 0x067C */
686 { 0x067D, 0 }, /* 0x067D */
687 { 0xFB56, 3 }, /* 0x067E D PEH */
688 { 0xFB62, 3 }, /* 0x067F D TEHEH */
690 { 0xFB5A, 3 }, /* 0x0680 D BEHEH */
691 { 0x0681, 0 }, /* 0x0681 */
692 { 0x0682, 0 }, /* 0x0682 */
693 { 0xFB76, 3 }, /* 0x0683 D NYEH */
694 { 0xFB72, 3 }, /* 0x0684 D DYEH */
695 { 0x0685, 0 }, /* 0x0685 */
696 { 0xFB7A, 3 }, /* 0x0686 D TCHEH */
697 { 0xFB7E, 3 }, /* 0x0687 D TCHEHEH */
698 { 0xFB88, 1 }, /* 0x0688 R DDAL */
699 { 0x0689, 0 }, /* 0x0689 */
700 { 0x068A, 0 }, /* 0x068A */
701 { 0x068B, 0 }, /* 0x068B */
702 { 0xFB84, 1 }, /* 0x068C R DAHAL */
703 { 0xFB82, 1 }, /* 0x068D R DDAHAL */
704 { 0xFB86, 1 }, /* 0x068E R DUL */
705 { 0x068F, 0 }, /* 0x068F */
707 { 0x0690, 0 }, /* 0x0690 */
708 { 0xFB8C, 1 }, /* 0x0691 R RREH */
709 { 0x0692, 0 }, /* 0x0692 */
710 { 0x0693, 0 }, /* 0x0693 */
711 { 0x0694, 0 }, /* 0x0694 */
712 { 0x0695, 0 }, /* 0x0695 */
713 { 0x0696, 0 }, /* 0x0696 */
714 { 0x0697, 0 }, /* 0x0697 */
715 { 0xFB8A, 1 }, /* 0x0698 R JEH */
716 { 0x0699, 0 }, /* 0x0699 */
717 { 0x069A, 0 }, /* 0x069A */
718 { 0x069B, 0 }, /* 0x069B */
719 { 0x069C, 0 }, /* 0x069C */
720 { 0x069D, 0 }, /* 0x069D */
721 { 0x069E, 0 }, /* 0x069E */
722 { 0x069F, 0 }, /* 0x069F */
724 { 0x06A0, 0 }, /* 0x06A0 */
725 { 0x06A1, 0 }, /* 0x06A1 */
726 { 0x06A2, 0 }, /* 0x06A2 */
727 { 0x06A3, 0 }, /* 0x06A3 */
728 { 0xFB6A, 3 }, /* 0x06A4 D VEH */
729 { 0x06A5, 0 }, /* 0x06A5 */
730 { 0xFB6E, 3 }, /* 0x06A6 D PEHEH */
731 { 0x06A7, 0 }, /* 0x06A7 */
732 { 0x06A8, 0 }, /* 0x06A8 */
733 { 0xFB8E, 3 }, /* 0x06A9 D KEHEH */
734 { 0x06AA, 0 }, /* 0x06AA */
735 { 0x06AB, 0 }, /* 0x06AB */
736 { 0x06AC, 0 }, /* 0x06AC */
737 { 0xFBD3, 3 }, /* 0x06AD D NG */
738 { 0x06AE, 0 }, /* 0x06AE */
739 { 0xFB92, 3 }, /* 0x06AF D GAF */
741 { 0x06B0, 0 }, /* 0x06B0 */
742 { 0xFB9A, 3 }, /* 0x06B1 D NGOEH */
743 { 0x06B2, 0 }, /* 0x06B2 */
744 { 0xFB96, 3 }, /* 0x06B3 D GUEH */
745 { 0x06B4, 0 }, /* 0x06B4 */
746 { 0x06B5, 0 }, /* 0x06B5 */
747 { 0x06B6, 0 }, /* 0x06B6 */
748 { 0x06B7, 0 }, /* 0x06B7 */
749 { 0x06B8, 0 }, /* 0x06B8 */
750 { 0x06B9, 0 }, /* 0x06B9 */
751 { 0xFB9E, 1 }, /* 0x06BA R NOON GHUNNA */
752 { 0xFBA0, 3 }, /* 0x06BB D RNOON */
753 { 0x06BC, 0 }, /* 0x06BC */
754 { 0x06BD, 0 }, /* 0x06BD */
755 { 0xFBAA, 3 }, /* 0x06BE D HEH DOACHASHMEE */
756 { 0x06BF, 0 }, /* 0x06BF */
758 { 0xFBA4, 1 }, /* 0x06C0 R HEH WITH YEH ABOVE */
759 { 0xFBA6, 3 }, /* 0x06C1 D HEH GOAL */
760 { 0x06C2, 0 }, /* 0x06C2 */
761 { 0x06C3, 0 }, /* 0x06C3 */
762 { 0x06C4, 0 }, /* 0x06C4 */
763 { 0xFBE0, 1 }, /* 0x06C5 R KIRGHIZ OE */
764 { 0xFBD9, 1 }, /* 0x06C6 R OE */
765 { 0xFBD7, 1 }, /* 0x06C7 R U */
766 { 0xFBDB, 1 }, /* 0x06C8 R YU */
767 { 0xFBE2, 1 }, /* 0x06C9 R KIRGHIZ YU */
768 { 0x06CA, 0 }, /* 0x06CA */
769 { 0xFBDE, 1 }, /* 0x06CB R VE */
770 { 0xFBFC, 3 }, /* 0x06CC D FARSI YEH */
771 { 0x06CD, 0 }, /* 0x06CD */
772 { 0x06CE, 0 }, /* 0x06CE */
773 { 0x06CF, 0 }, /* 0x06CF */
775 { 0xFBE4, 3 }, /* 0x06D0 D E */
776 { 0x06D1, 0 }, /* 0x06D1 */
777 { 0xFBAE, 1 }, /* 0x06D2 R YEH BARREE */
778 { 0xFBB0, 1 }, /* 0x06D3 R YEH BARREE WITH HAMZA ABOVE */
779 { 0x06D4, 0 }, /* 0x06D4 */
780 { 0x06D5, 0 }, /* 0x06D5 */
781 { 0x06D6, 0 }, /* 0x06D6 */
782 { 0x06D7, 0 }, /* 0x06D7 */
783 { 0x06D8, 0 }, /* 0x06D8 */
784 { 0x06D9, 0 }, /* 0x06D9 */
785 { 0x06DA, 0 }, /* 0x06DA */
786 { 0x06DB, 0 }, /* 0x06DB */
787 { 0x06DC, 0 }, /* 0x06DC */
788 { 0x06DD, 0 }, /* 0x06DD */
789 { 0x06DE, 0 }, /* 0x06DE */
790 { 0x06DF, 0 }, /* 0x06DF */
792 { 0x06E0, 0 }, /* 0x06E0 */
793 { 0x06E1, 0 }, /* 0x06E1 */
794 { 0x06E2, 0 }, /* 0x06E2 */
795 { 0x06E3, 0 }, /* 0x06E3 */
796 { 0x06E4, 0 }, /* 0x06E4 */
797 { 0x06E5, 0 }, /* 0x06E5 */
798 { 0x06E6, 0 }, /* 0x06E6 */
799 { 0x06E7, 0 }, /* 0x06E7 */
800 { 0x06E8, 0 }, /* 0x06E8 */
801 { 0x06E9, 0 }, /* 0x06E9 */
802 { 0x06EA, 0 }, /* 0x06EA */
803 { 0x06EB, 0 }, /* 0x06EB */
804 { 0x06EC, 0 }, /* 0x06EC */
805 { 0x06ED, 0 }, /* 0x06ED */
806 { 0x06EE, 0 }, /* 0x06EE */
807 { 0x06EF, 0 }, /* 0x06EF */
809 { 0x06F0, 0 }, /* 0x06F0 */
810 { 0x06F1, 0 }, /* 0x06F1 */
811 { 0x06F2, 0 }, /* 0x06F2 */
812 { 0x06F3, 0 }, /* 0x06F3 */
813 { 0x06F4, 0 }, /* 0x06F4 */
814 { 0x06F5, 0 }, /* 0x06F5 */
815 { 0x06F6, 0 }, /* 0x06F6 */
816 { 0x06F7, 0 }, /* 0x06F7 */
817 { 0x06F8, 0 }, /* 0x06F8 */
818 { 0x06F9, 0 }, /* 0x06F9 */
819 { 0x06FA, 0 }, /* 0x06FA */
820 { 0x06FB, 0 }, /* 0x06FB */
821 { 0x06FC, 0 }, /* 0x06FC */
822 { 0x06FD, 0 }, /* 0x06FD */
823 { 0x06FE, 0 }, /* 0x06FE */
824 { 0x06FF, 0 } /* 0x06FF */
827 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, this table does */
828 static const hb_uint16 alefMaksura[4] = {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9};
831 // this is a bit tricky. Alef always binds to the right, so the second parameter descibing the shape
832 // of the lam can be either initial of medial. So initial maps to the isolated form of the ligature,
833 // medial to the final form
835 static const hb_uint16 arabicUnicodeLamAlefMapping[6][4] = {
836 { 0xfffd, 0xfffd, 0xfef5, 0xfef6 }, /* 0x622 R Alef with Madda above */
837 { 0xfffd, 0xfffd, 0xfef7, 0xfef8 }, /* 0x623 R Alef with Hamza above */
838 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x624 // Just to fill the table ;-) */
839 { 0xfffd, 0xfffd, 0xfef9, 0xfefa }, /* 0x625 R Alef with Hamza below */
840 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x626 // Just to fill the table ;-) */
841 { 0xfffd, 0xfffd, 0xfefb, 0xfefc } /* 0x627 R Alef */
844 static int getShape(hb_uint8 cell, int shape)
846 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, handle this here */
847 int ch = (cell != 0x49)
848 ? (shape ? arabicUnicodeMapping[cell][0] + shape : 0x600+cell)
849 : alefMaksura[shape] ;
855 Two small helper functions for arabic shaping.
857 static HB_UChar16 prevChar(const HB_UChar16 *str, int pos)
859 /*qDebug("leftChar: pos=%d", pos); */
860 const HB_UChar16 *ch = str + pos - 1;
863 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
868 return ReplacementCharacter;
871 static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
873 const HB_UChar16 *ch = str + pos + 1;
876 /*qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); */
877 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
879 /* assume it's a transparent char, this might not be 100% correct */
883 return ReplacementCharacter;
886 static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength,
887 HB_Bool reverse, HB_GlyphAttributes *attributes, unsigned short *logClusters)
889 HB_ArabicProperties *properties;
892 const HB_UChar16 *ch;
896 HB_STACKARRAY(HB_ArabicProperties, props, len + 2);
899 assert(stringLength >= from + len);
911 if (f + l < stringLength)
913 getArabicProperties(uc+f, l, props);
919 for (i = 0; i < len; i++) {
920 hb_uint8 r = *ch >> 8;
921 int gpos = data - shapeBuffer;
925 if (*ch == 0x200c || *ch == 0x200d)
926 /* remove ZWJ and ZWNJ */
930 *data = HB_GetMirroredChar(*ch);
934 hb_uint8 c = *ch & 0xff;
936 int shape = properties[i].shape;
937 /* qDebug("mapping U+%x to shape %d glyph=0x%x", ch->unicode(), shape, getShape(c, shape)); */
938 /* take care of lam-alef ligatures (lam right of alef) */
941 case 0x44: { /* lam */
942 const HB_UChar16 pch = nextChar(uc, stringLength, pos);
943 if ((pch >> 8) == 0x06) {
944 switch (pch & 0xff) {
949 /* qDebug(" lam of lam-alef ligature"); */
950 map = arabicUnicodeLamAlefMapping[(pch & 0xff) - 0x22][shape];
958 case 0x22: /* alef with madda */
959 case 0x23: /* alef with hamza above */
960 case 0x25: /* alef with hamza below */
961 case 0x27: /* alef */
962 if (prevChar(uc, pos) == 0x0644) {
963 /* have a lam alef ligature */
964 /*qDebug(" alef of lam-alef ligature"); */
970 map = getShape(c, shape);
975 /*glyphs[gpos].attributes.zeroWidth = zeroWidth; */
976 if (HB_GetUnicodeCharCategory(*ch) == HB_Mark_NonSpacing) {
977 attributes[gpos].mark = TRUE;
978 /* qDebug("glyph %d (char %d) is mark!", gpos, i); */
980 attributes[gpos].mark = FALSE;
981 clusterStart = data - shapeBuffer;
983 attributes[gpos].clusterStart = !attributes[gpos].mark;
984 attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(*ch);
985 attributes[gpos].justification = properties[i].justification;
986 /* qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode());*/
990 logClusters[i] = clusterStart;
992 *shapedLength = data - shapeBuffer;
994 HB_FREE_STACKARRAY(props);
999 static const HB_OpenTypeFeature arabic_features[] = {
1000 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1001 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
1002 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
1003 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
1004 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1005 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
1006 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
1007 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
1008 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
1009 { HB_MAKE_TAG('c', 's', 'w', 'h'), CswhProperty },
1010 /* mset is used in old Win95 fonts that don't have a 'mark' positioning table. */
1011 { HB_MAKE_TAG('m', 's', 'e', 't'), MsetProperty },
1015 static const HB_OpenTypeFeature syriac_features[] = {
1016 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1017 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
1018 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
1019 { HB_MAKE_TAG('f', 'i', 'n', '2'), FinaProperty },
1020 { HB_MAKE_TAG('f', 'i', 'n', '3'), FinaProperty },
1021 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
1022 { HB_MAKE_TAG('m', 'e', 'd', '2'), MediProperty },
1023 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1024 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
1025 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
1026 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
1027 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
1031 static HB_Bool arabicSyriacOpenTypeShape(HB_ShaperItem *item, HB_Bool *ot_ok)
1033 const HB_UChar16 *uc;
1034 const int nglyphs = item->num_glyphs;
1037 HB_ArabicProperties *properties;
1038 HB_DECLARE_STACKARRAY(HB_ArabicProperties, props)
1039 HB_DECLARE_STACKARRAY(hb_uint32, apply)
1045 if (!HB_ConvertStringToGlyphIndices(item))
1047 HB_HeuristicSetGlyphAttributes(item);
1049 HB_INIT_STACKARRAY(HB_ArabicProperties, props, item->item.length + 2);
1050 HB_INIT_STACKARRAY(hb_uint32, apply, item->num_glyphs);
1052 uc = item->string + item->item.pos;
1056 l = item->item.length;
1057 if (item->item.pos > 0) {
1062 if (f + l + item->item.pos < item->stringLength) {
1065 if (item->item.script == HB_Script_Nko)
1066 getNkoProperties(uc+f, l, props);
1068 getArabicProperties(uc+f, l, props);
1070 for (i = 0; i < (int)item->num_glyphs; i++) {
1073 if (properties[i].shape == XIsolated)
1074 apply[i] |= MediProperty|FinaProperty|InitProperty;
1075 else if (properties[i].shape == XMedial)
1076 apply[i] |= IsolProperty|FinaProperty|InitProperty;
1077 else if (properties[i].shape == XFinal)
1078 apply[i] |= IsolProperty|MediProperty|InitProperty;
1079 else if (properties[i].shape == XInitial)
1080 apply[i] |= IsolProperty|MediProperty|FinaProperty;
1082 item->attributes[i].justification = properties[i].justification;
1085 HB_FREE_STACKARRAY(props);
1087 shaped = HB_OpenTypeShape(item, apply);
1089 HB_FREE_STACKARRAY(apply);
1095 return HB_OpenTypePosition(item, nglyphs, /*doLogClusters*/TRUE);
1100 /* #### stil missing: identify invalid character combinations */
1101 HB_Bool HB_ArabicShape(HB_ShaperItem *item)
1105 HB_STACKARRAY(HB_UChar16, shapedChars, item->item.length);
1107 assert(item->item.script == HB_Script_Arabic || item->item.script == HB_Script_Syriac
1108 || item->item.script == HB_Script_Nko);
1112 if (HB_SelectScript(item, item->item.script == HB_Script_Arabic ? arabic_features : syriac_features)) {
1114 if (arabicSyriacOpenTypeShape(item, &ot_ok)) {
1115 HB_FREE_STACKARRAY(shapedChars);
1119 HB_FREE_STACKARRAY(shapedChars);
1121 /* fall through to the non OT code*/
1126 if (item->item.script != HB_Script_Arabic) {
1127 HB_FREE_STACKARRAY(shapedChars);
1128 return HB_BasicShape(item);
1131 shapedString(item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen,
1132 item->item.bidiLevel % 2,
1133 item->attributes, item->log_clusters);
1135 haveGlyphs = item->font->klass
1136 ->convertStringToGlyphIndices(item->font,
1138 item->glyphs, &item->num_glyphs,
1139 item->item.bidiLevel % 2);
1141 HB_FREE_STACKARRAY(shapedChars);
1146 HB_HeuristicPosition(item);