2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
4 * This is part of HarfBuzz, an OpenType Layout engine library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
30 static const HB_UChar16 ReplacementCharacter = 0xfffd;
34 unsigned char justification;
35 } HB_ArabicProperties;
42 /* intermediate state */
47 // these groups correspond to the groups defined in the Unicode standard.
48 // Some of these groups are equal with regards to both joining and line breaking behaviour,
49 // and thus have the same enum value
51 // I'm not sure the mapping of syriac to arabic enums is correct with regards to justification, but as
52 // I couldn't find any better document I'll hope for the best.
91 YehWithTail = HamzaOnHehGoal,
92 YehBarre = HamzaOnHehGoal,
121 Sadhe = HamzaOnHehGoal,
124 /* Compiler bug? Otherwise ArabicGroupsEnd would be equal to Dal + 1. */
125 Dummy = HamzaOnHehGoal,
129 static const unsigned char arabic_group[0x150] = {
130 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
131 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
132 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
133 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
135 Transparent, Transparent, Transparent, Transparent,
136 Transparent, Transparent, ArabicNone, ArabicNone,
137 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
138 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
140 ArabicNone, ArabicNone, Alef, Alef,
141 Waw, Alef, Yeh, Alef,
142 Beh, TehMarbuta, Beh, Beh,
147 Tah, Ain, Ain, ArabicNone,
148 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
151 Kashida, Feh, Qaf, Kaf,
152 Lam, Meem, Noon, Heh,
153 Waw, Yeh, Yeh, Transparent,
154 Transparent, Transparent, Transparent, Transparent,
156 Transparent, Transparent, Transparent, Transparent,
157 Transparent, Transparent, Transparent, Transparent,
158 Transparent, ArabicNone, ArabicNone, ArabicNone,
159 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
161 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
162 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
163 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
164 ArabicNone, ArabicNone, Beh, Qaf,
166 Transparent, Alef, Alef, Alef,
167 ArabicNone, Alef, Waw, Waw,
179 Reh, Reh, Seen, Seen,
184 Qaf, Gaf, SwashKaf, Gaf,
189 Lam, Noon, Noon, Noon,
190 Noon, Noon, KnottedHeh, Hah,
193 TehMarbuta, HehGoal, HamzaOnHehGoal, HamzaOnHehGoal,
196 Yeh, YehWithTail, Yeh, Waw,
198 Yeh, Yeh, YehBarre, YehBarre,
199 ArabicNone, TehMarbuta, Transparent, Transparent,
200 Transparent, Transparent, Transparent, Transparent,
201 Transparent, ArabicNone, ArabicNone, Transparent,
203 Transparent, Transparent, Transparent, Transparent,
204 Transparent, ArabicNone, ArabicNone, Transparent,
205 Transparent, ArabicNone, Transparent, Transparent,
206 Transparent, Transparent, Dal, Reh,
208 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
209 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
210 ArabicNone, ArabicNone, Seen, Sad,
211 Ain, ArabicNone, ArabicNone, KnottedHeh,
214 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
215 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
216 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
217 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
219 Alaph, Transparent, Beth, Gamal,
220 Gamal, Dalath, Dalath, He,
221 SyriacWaw, Zain, Heth, Teth,
222 Teth, Yudh, YudhHe, Kaph,
224 Lamadh, Mim, Nun, Semakh,
225 FinalSemakh, SyriacE, Pe, ReversedPe,
226 Sadhe, Qaph, Dalath, Shin,
227 Taw, Beth, Gamal, Dalath,
229 Transparent, Transparent, Transparent, Transparent,
230 Transparent, Transparent, Transparent, Transparent,
231 Transparent, Transparent, Transparent, Transparent,
232 Transparent, Transparent, Transparent, Transparent,
234 Transparent, Transparent, Transparent, Transparent,
235 Transparent, Transparent, Transparent, Transparent,
236 Transparent, Transparent, Transparent, ArabicNone,
237 ArabicNone, Zain, Kaph, Fe,
240 static ArabicGroup arabicGroup(unsigned short uc)
242 if (uc >= 0x0600 && uc < 0x750)
243 return (ArabicGroup) arabic_group[uc-0x600];
244 else if (uc == 0x200d)
246 else if (HB_GetUnicodeCharCategory(uc) == HB_Separator_Space)
254 Arabic shaping obeys a number of rules according to the joining classes (see Unicode book, section on
257 Each unicode char has a joining class (right, dual (left&right), center (joincausing) or transparent).
258 transparent joining is not encoded in HB_UChar16::joining(), but applies to all combining marks and format marks.
260 Right join-causing: dual + center
261 Left join-causing: dual + right + center
263 Rules are as follows (for a string already in visual order, as we have it here):
265 R1 Transparent characters do not affect joining behaviour.
266 R2 A right joining character, that has a right join-causing char on the right will get form XRight
267 (R3 A left joining character, that has a left join-causing char on the left will get form XLeft)
268 Note: the above rule is meaningless, as there are no pure left joining characters defined in Unicode
269 R4 A dual joining character, that has a left join-causing char on the left and a right join-causing char on
270 the right will get form XMedial
271 R5 A dual joining character, that has a right join causing char on the right, and no left join causing char on the left
273 R6 A dual joining character, that has a left join causing char on the left, and no right join causing char on the right
275 R7 Otherwise the character will get form XIsolated
277 Additionally we have to do the minimal ligature support for lam-alef ligatures:
279 L1 Transparent characters do not affect ligature behaviour.
280 L2 Any sequence of Alef(XRight) + Lam(XMedial) will form the ligature Alef.Lam(XLeft)
281 L3 Any sequence of Alef(XRight) + Lam(XLeft) will form the ligature Alef.Lam(XIsolated)
283 The state table below handles rules R1-R7.
294 static const Joining joining_for_group[ArabicGroupsEnd] = {
296 JNone, /* ArabicNone */
297 JNone, /* ArabicSpace */
299 JTransparent, /* Transparent */
301 JCausing, /* Center */
302 JCausing, /* Kashida */
316 JRight /* HamzaOnHehGoal */
325 static const JoiningPair joining_table[5][4] =
326 /* None, Causing, Dual, Right */
328 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XInitial }, { XIsolated, XIsolated } }, /* XIsolated */
329 { { XFinal, XIsolated }, { XFinal, XCausing }, { XFinal, XInitial }, { XFinal, XIsolated } }, /* XFinal */
330 { { XIsolated, XIsolated }, { XInitial, XCausing }, { XInitial, XMedial }, { XInitial, XFinal } }, /* XInitial */
331 { { XFinal, XIsolated }, { XMedial, XCausing }, { XMedial, XMedial }, { XMedial, XFinal } }, /* XMedial */
332 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XMedial }, { XIsolated, XFinal } }, /* XCausing */
337 According to http://www.microsoft.com/middleeast/Arabicdev/IE6/KBase.asp
339 1. Find the priority of the connecting opportunities in each word
340 2. Add expansion at the highest priority connection opportunity
341 3. If more than one connection opportunity have the same highest value,
342 use the opportunity closest to the end of the word.
344 Following is a chart that provides the priority for connection
345 opportunities and where expansion occurs. The character group names
346 are those in table 6.6 of the UNICODE 2.0 book.
349 PrioritY Glyph Condition Kashida Location
351 Arabic_Kashida User inserted Kashida The user entered a Kashida in a position. After the user
352 (Shift+j or Shift+[E with hat]) Thus, it is the highest priority to insert an inserted kashida
355 Arabic_Seen Seen, Sad Connecting to the next character. After the character.
356 (Initial or medial form).
358 Arabic_HaaDal Teh Marbutah, Haa, Dal Connecting to previous character. Before the final form
361 Arabic_Alef Alef, Tah, Lam, Connecting to previous character. Before the final form
362 Kaf and Gaf of these characters.
364 Arabic_BaRa Reh, Yeh Connected to medial Beh Before preceding medial Baa
366 Arabic_Waw Waw, Ain, Qaf, Feh Connecting to previous character. Before the final form of
369 Arabic_Normal Other connecting Connecting to previous character. Before the final form
370 characters of these characters.
374 This seems to imply that we have at most one kashida point per arabic word.
378 static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
380 /* qDebug("arabicSyriacOpenTypeShape: properties:"); */
382 int lastGroup = ArabicNone;
385 ArabicGroup group = arabicGroup(chars[0]);
386 Joining j = joining_for_group[group];
387 ArabicShape shape = joining_table[XIsolated][j].form2;
388 properties[0].justification = HB_NoJustification;
390 for (i = 1; i < len; ++i) {
391 /* #### fix handling for spaces and punktuation */
392 properties[i].justification = HB_NoJustification;
394 group = arabicGroup(chars[i]);
395 j = joining_for_group[group];
397 if (j == JTransparent) {
398 properties[i].shape = XIsolated;
402 properties[lastPos].shape = joining_table[shape][j].form1;
403 shape = joining_table[shape][j].form2;
407 if (properties[lastPos].shape == XInitial || properties[lastPos].shape == XMedial)
408 properties[i-1].justification = HB_Arabic_Seen;
411 if (properties[lastPos].shape == XFinal)
412 properties[lastPos-1].justification = HB_Arabic_HaaDal;
415 if (properties[lastPos].shape == XFinal)
416 properties[lastPos-1].justification = HB_Arabic_Alef;
419 if (properties[lastPos].shape == XFinal)
420 properties[lastPos-1].justification = HB_Arabic_Waw;
423 if (properties[lastPos].shape == XFinal)
424 properties[lastPos-1].justification = HB_Arabic_Normal;
433 lastGroup = ArabicNone;
438 /* ### Center should probably be treated as transparent when it comes to justification. */
442 properties[i].justification = HB_Arabic_Space;
445 properties[i].justification = HB_Arabic_Kashida;
463 if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh)
464 properties[lastPos-1].justification = HB_Arabic_BaRa;
477 case ArabicGroupsEnd:
483 properties[lastPos].shape = joining_table[shape][JNone].form1;
487 for (int i = 0; i < len; ++i)
488 qDebug("arabic properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification);
492 static Joining getNkoJoining(unsigned short uc)
507 static void getNkoProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
512 Joining j = getNkoJoining(chars[0]);
513 ArabicShape shape = joining_table[XIsolated][j].form2;
514 properties[0].justification = HB_NoJustification;
516 for (i = 1; i < len; ++i) {
517 properties[i].justification = (HB_GetUnicodeCharCategory(chars[i]) == HB_Separator_Space) ?
518 ArabicSpace : ArabicNone;
520 j = getNkoJoining(chars[i]);
522 if (j == JTransparent) {
523 properties[i].shape = XIsolated;
527 properties[lastPos].shape = joining_table[shape][j].form1;
528 shape = joining_table[shape][j].form2;
533 properties[lastPos].shape = joining_table[shape][JNone].form1;
537 for (int i = 0; i < len; ++i)
538 qDebug("nko properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification);
543 // The unicode to unicode shaping codec.
544 // does only presentation forms B at the moment, but that should be enough for
547 static const hb_uint16 arabicUnicodeMapping[256][2] = {
548 /* base of shaped forms, and number-1 of them (0 for non shaping,
549 1 for right binding and 3 for dual binding */
551 /* These are just the glyphs available in Unicode,
552 some characters are in R class, but have no glyphs in Unicode. */
554 { 0x0600, 0 }, /* 0x0600 */
555 { 0x0601, 0 }, /* 0x0601 */
556 { 0x0602, 0 }, /* 0x0602 */
557 { 0x0603, 0 }, /* 0x0603 */
558 { 0x0604, 0 }, /* 0x0604 */
559 { 0x0605, 0 }, /* 0x0605 */
560 { 0x0606, 0 }, /* 0x0606 */
561 { 0x0607, 0 }, /* 0x0607 */
562 { 0x0608, 0 }, /* 0x0608 */
563 { 0x0609, 0 }, /* 0x0609 */
564 { 0x060A, 0 }, /* 0x060A */
565 { 0x060B, 0 }, /* 0x060B */
566 { 0x060C, 0 }, /* 0x060C */
567 { 0x060D, 0 }, /* 0x060D */
568 { 0x060E, 0 }, /* 0x060E */
569 { 0x060F, 0 }, /* 0x060F */
571 { 0x0610, 0 }, /* 0x0610 */
572 { 0x0611, 0 }, /* 0x0611 */
573 { 0x0612, 0 }, /* 0x0612 */
574 { 0x0613, 0 }, /* 0x0613 */
575 { 0x0614, 0 }, /* 0x0614 */
576 { 0x0615, 0 }, /* 0x0615 */
577 { 0x0616, 0 }, /* 0x0616 */
578 { 0x0617, 0 }, /* 0x0617 */
579 { 0x0618, 0 }, /* 0x0618 */
580 { 0x0619, 0 }, /* 0x0619 */
581 { 0x061A, 0 }, /* 0x061A */
582 { 0x061B, 0 }, /* 0x061B */
583 { 0x061C, 0 }, /* 0x061C */
584 { 0x061D, 0 }, /* 0x061D */
585 { 0x061E, 0 }, /* 0x061E */
586 { 0x061F, 0 }, /* 0x061F */
588 { 0x0620, 0 }, /* 0x0620 */
589 { 0xFE80, 0 }, /* 0x0621 HAMZA */
590 { 0xFE81, 1 }, /* 0x0622 R ALEF WITH MADDA ABOVE */
591 { 0xFE83, 1 }, /* 0x0623 R ALEF WITH HAMZA ABOVE */
592 { 0xFE85, 1 }, /* 0x0624 R WAW WITH HAMZA ABOVE */
593 { 0xFE87, 1 }, /* 0x0625 R ALEF WITH HAMZA BELOW */
594 { 0xFE89, 3 }, /* 0x0626 D YEH WITH HAMZA ABOVE */
595 { 0xFE8D, 1 }, /* 0x0627 R ALEF */
596 { 0xFE8F, 3 }, /* 0x0628 D BEH */
597 { 0xFE93, 1 }, /* 0x0629 R TEH MARBUTA */
598 { 0xFE95, 3 }, /* 0x062A D TEH */
599 { 0xFE99, 3 }, /* 0x062B D THEH */
600 { 0xFE9D, 3 }, /* 0x062C D JEEM */
601 { 0xFEA1, 3 }, /* 0x062D D HAH */
602 { 0xFEA5, 3 }, /* 0x062E D KHAH */
603 { 0xFEA9, 1 }, /* 0x062F R DAL */
605 { 0xFEAB, 1 }, /* 0x0630 R THAL */
606 { 0xFEAD, 1 }, /* 0x0631 R REH */
607 { 0xFEAF, 1 }, /* 0x0632 R ZAIN */
608 { 0xFEB1, 3 }, /* 0x0633 D SEEN */
609 { 0xFEB5, 3 }, /* 0x0634 D SHEEN */
610 { 0xFEB9, 3 }, /* 0x0635 D SAD */
611 { 0xFEBD, 3 }, /* 0x0636 D DAD */
612 { 0xFEC1, 3 }, /* 0x0637 D TAH */
613 { 0xFEC5, 3 }, /* 0x0638 D ZAH */
614 { 0xFEC9, 3 }, /* 0x0639 D AIN */
615 { 0xFECD, 3 }, /* 0x063A D GHAIN */
616 { 0x063B, 0 }, /* 0x063B */
617 { 0x063C, 0 }, /* 0x063C */
618 { 0x063D, 0 }, /* 0x063D */
619 { 0x063E, 0 }, /* 0x063E */
620 { 0x063F, 0 }, /* 0x063F */
622 { 0x0640, 0 }, /* 0x0640 C TATWEEL // ### Join Causing, only one glyph */
623 { 0xFED1, 3 }, /* 0x0641 D FEH */
624 { 0xFED5, 3 }, /* 0x0642 D QAF */
625 { 0xFED9, 3 }, /* 0x0643 D KAF */
626 { 0xFEDD, 3 }, /* 0x0644 D LAM */
627 { 0xFEE1, 3 }, /* 0x0645 D MEEM */
628 { 0xFEE5, 3 }, /* 0x0646 D NOON */
629 { 0xFEE9, 3 }, /* 0x0647 D HEH */
630 { 0xFEED, 1 }, /* 0x0648 R WAW */
631 { 0x0649, 3 }, /* 0x0649 ALEF MAKSURA // ### Dual, glyphs not consecutive, handle in code. */
632 { 0xFEF1, 3 }, /* 0x064A D YEH */
633 { 0x064B, 0 }, /* 0x064B */
634 { 0x064C, 0 }, /* 0x064C */
635 { 0x064D, 0 }, /* 0x064D */
636 { 0x064E, 0 }, /* 0x064E */
637 { 0x064F, 0 }, /* 0x064F */
639 { 0x0650, 0 }, /* 0x0650 */
640 { 0x0651, 0 }, /* 0x0651 */
641 { 0x0652, 0 }, /* 0x0652 */
642 { 0x0653, 0 }, /* 0x0653 */
643 { 0x0654, 0 }, /* 0x0654 */
644 { 0x0655, 0 }, /* 0x0655 */
645 { 0x0656, 0 }, /* 0x0656 */
646 { 0x0657, 0 }, /* 0x0657 */
647 { 0x0658, 0 }, /* 0x0658 */
648 { 0x0659, 0 }, /* 0x0659 */
649 { 0x065A, 0 }, /* 0x065A */
650 { 0x065B, 0 }, /* 0x065B */
651 { 0x065C, 0 }, /* 0x065C */
652 { 0x065D, 0 }, /* 0x065D */
653 { 0x065E, 0 }, /* 0x065E */
654 { 0x065F, 0 }, /* 0x065F */
656 { 0x0660, 0 }, /* 0x0660 */
657 { 0x0661, 0 }, /* 0x0661 */
658 { 0x0662, 0 }, /* 0x0662 */
659 { 0x0663, 0 }, /* 0x0663 */
660 { 0x0664, 0 }, /* 0x0664 */
661 { 0x0665, 0 }, /* 0x0665 */
662 { 0x0666, 0 }, /* 0x0666 */
663 { 0x0667, 0 }, /* 0x0667 */
664 { 0x0668, 0 }, /* 0x0668 */
665 { 0x0669, 0 }, /* 0x0669 */
666 { 0x066A, 0 }, /* 0x066A */
667 { 0x066B, 0 }, /* 0x066B */
668 { 0x066C, 0 }, /* 0x066C */
669 { 0x066D, 0 }, /* 0x066D */
670 { 0x066E, 0 }, /* 0x066E */
671 { 0x066F, 0 }, /* 0x066F */
673 { 0x0670, 0 }, /* 0x0670 */
674 { 0xFB50, 1 }, /* 0x0671 R ALEF WASLA */
675 { 0x0672, 0 }, /* 0x0672 */
676 { 0x0673, 0 }, /* 0x0673 */
677 { 0x0674, 0 }, /* 0x0674 */
678 { 0x0675, 0 }, /* 0x0675 */
679 { 0x0676, 0 }, /* 0x0676 */
680 { 0x0677, 0 }, /* 0x0677 */
681 { 0x0678, 0 }, /* 0x0678 */
682 { 0xFB66, 3 }, /* 0x0679 D TTEH */
683 { 0xFB5E, 3 }, /* 0x067A D TTEHEH */
684 { 0xFB52, 3 }, /* 0x067B D BEEH */
685 { 0x067C, 0 }, /* 0x067C */
686 { 0x067D, 0 }, /* 0x067D */
687 { 0xFB56, 3 }, /* 0x067E D PEH */
688 { 0xFB62, 3 }, /* 0x067F D TEHEH */
690 { 0xFB5A, 3 }, /* 0x0680 D BEHEH */
691 { 0x0681, 0 }, /* 0x0681 */
692 { 0x0682, 0 }, /* 0x0682 */
693 { 0xFB76, 3 }, /* 0x0683 D NYEH */
694 { 0xFB72, 3 }, /* 0x0684 D DYEH */
695 { 0x0685, 0 }, /* 0x0685 */
696 { 0xFB7A, 3 }, /* 0x0686 D TCHEH */
697 { 0xFB7E, 3 }, /* 0x0687 D TCHEHEH */
698 { 0xFB88, 1 }, /* 0x0688 R DDAL */
699 { 0x0689, 0 }, /* 0x0689 */
700 { 0x068A, 0 }, /* 0x068A */
701 { 0x068B, 0 }, /* 0x068B */
702 { 0xFB84, 1 }, /* 0x068C R DAHAL */
703 { 0xFB82, 1 }, /* 0x068D R DDAHAL */
704 { 0xFB86, 1 }, /* 0x068E R DUL */
705 { 0x068F, 0 }, /* 0x068F */
707 { 0x0690, 0 }, /* 0x0690 */
708 { 0xFB8C, 1 }, /* 0x0691 R RREH */
709 { 0x0692, 0 }, /* 0x0692 */
710 { 0x0693, 0 }, /* 0x0693 */
711 { 0x0694, 0 }, /* 0x0694 */
712 { 0x0695, 0 }, /* 0x0695 */
713 { 0x0696, 0 }, /* 0x0696 */
714 { 0x0697, 0 }, /* 0x0697 */
715 { 0xFB8A, 1 }, /* 0x0698 R JEH */
716 { 0x0699, 0 }, /* 0x0699 */
717 { 0x069A, 0 }, /* 0x069A */
718 { 0x069B, 0 }, /* 0x069B */
719 { 0x069C, 0 }, /* 0x069C */
720 { 0x069D, 0 }, /* 0x069D */
721 { 0x069E, 0 }, /* 0x069E */
722 { 0x069F, 0 }, /* 0x069F */
724 { 0x06A0, 0 }, /* 0x06A0 */
725 { 0x06A1, 0 }, /* 0x06A1 */
726 { 0x06A2, 0 }, /* 0x06A2 */
727 { 0x06A3, 0 }, /* 0x06A3 */
728 { 0xFB6A, 3 }, /* 0x06A4 D VEH */
729 { 0x06A5, 0 }, /* 0x06A5 */
730 { 0xFB6E, 3 }, /* 0x06A6 D PEHEH */
731 { 0x06A7, 0 }, /* 0x06A7 */
732 { 0x06A8, 0 }, /* 0x06A8 */
733 { 0xFB8E, 3 }, /* 0x06A9 D KEHEH */
734 { 0x06AA, 0 }, /* 0x06AA */
735 { 0x06AB, 0 }, /* 0x06AB */
736 { 0x06AC, 0 }, /* 0x06AC */
737 { 0xFBD3, 3 }, /* 0x06AD D NG */
738 { 0x06AE, 0 }, /* 0x06AE */
739 { 0xFB92, 3 }, /* 0x06AF D GAF */
741 { 0x06B0, 0 }, /* 0x06B0 */
742 { 0xFB9A, 3 }, /* 0x06B1 D NGOEH */
743 { 0x06B2, 0 }, /* 0x06B2 */
744 { 0xFB96, 3 }, /* 0x06B3 D GUEH */
745 { 0x06B4, 0 }, /* 0x06B4 */
746 { 0x06B5, 0 }, /* 0x06B5 */
747 { 0x06B6, 0 }, /* 0x06B6 */
748 { 0x06B7, 0 }, /* 0x06B7 */
749 { 0x06B8, 0 }, /* 0x06B8 */
750 { 0x06B9, 0 }, /* 0x06B9 */
751 { 0xFB9E, 1 }, /* 0x06BA R NOON GHUNNA */
752 { 0xFBA0, 3 }, /* 0x06BB D RNOON */
753 { 0x06BC, 0 }, /* 0x06BC */
754 { 0x06BD, 0 }, /* 0x06BD */
755 { 0xFBAA, 3 }, /* 0x06BE D HEH DOACHASHMEE */
756 { 0x06BF, 0 }, /* 0x06BF */
758 { 0xFBA4, 1 }, /* 0x06C0 R HEH WITH YEH ABOVE */
759 { 0xFBA6, 3 }, /* 0x06C1 D HEH GOAL */
760 { 0x06C2, 0 }, /* 0x06C2 */
761 { 0x06C3, 0 }, /* 0x06C3 */
762 { 0x06C4, 0 }, /* 0x06C4 */
763 { 0xFBE0, 1 }, /* 0x06C5 R KIRGHIZ OE */
764 { 0xFBD9, 1 }, /* 0x06C6 R OE */
765 { 0xFBD7, 1 }, /* 0x06C7 R U */
766 { 0xFBDB, 1 }, /* 0x06C8 R YU */
767 { 0xFBE2, 1 }, /* 0x06C9 R KIRGHIZ YU */
768 { 0x06CA, 0 }, /* 0x06CA */
769 { 0xFBDE, 1 }, /* 0x06CB R VE */
770 { 0xFBFC, 3 }, /* 0x06CC D FARSI YEH */
771 { 0x06CD, 0 }, /* 0x06CD */
772 { 0x06CE, 0 }, /* 0x06CE */
773 { 0x06CF, 0 }, /* 0x06CF */
775 { 0xFBE4, 3 }, /* 0x06D0 D E */
776 { 0x06D1, 0 }, /* 0x06D1 */
777 { 0xFBAE, 1 }, /* 0x06D2 R YEH BARREE */
778 { 0xFBB0, 1 }, /* 0x06D3 R YEH BARREE WITH HAMZA ABOVE */
779 { 0x06D4, 0 }, /* 0x06D4 */
780 { 0x06D5, 0 }, /* 0x06D5 */
781 { 0x06D6, 0 }, /* 0x06D6 */
782 { 0x06D7, 0 }, /* 0x06D7 */
783 { 0x06D8, 0 }, /* 0x06D8 */
784 { 0x06D9, 0 }, /* 0x06D9 */
785 { 0x06DA, 0 }, /* 0x06DA */
786 { 0x06DB, 0 }, /* 0x06DB */
787 { 0x06DC, 0 }, /* 0x06DC */
788 { 0x06DD, 0 }, /* 0x06DD */
789 { 0x06DE, 0 }, /* 0x06DE */
790 { 0x06DF, 0 }, /* 0x06DF */
792 { 0x06E0, 0 }, /* 0x06E0 */
793 { 0x06E1, 0 }, /* 0x06E1 */
794 { 0x06E2, 0 }, /* 0x06E2 */
795 { 0x06E3, 0 }, /* 0x06E3 */
796 { 0x06E4, 0 }, /* 0x06E4 */
797 { 0x06E5, 0 }, /* 0x06E5 */
798 { 0x06E6, 0 }, /* 0x06E6 */
799 { 0x06E7, 0 }, /* 0x06E7 */
800 { 0x06E8, 0 }, /* 0x06E8 */
801 { 0x06E9, 0 }, /* 0x06E9 */
802 { 0x06EA, 0 }, /* 0x06EA */
803 { 0x06EB, 0 }, /* 0x06EB */
804 { 0x06EC, 0 }, /* 0x06EC */
805 { 0x06ED, 0 }, /* 0x06ED */
806 { 0x06EE, 0 }, /* 0x06EE */
807 { 0x06EF, 0 }, /* 0x06EF */
809 { 0x06F0, 0 }, /* 0x06F0 */
810 { 0x06F1, 0 }, /* 0x06F1 */
811 { 0x06F2, 0 }, /* 0x06F2 */
812 { 0x06F3, 0 }, /* 0x06F3 */
813 { 0x06F4, 0 }, /* 0x06F4 */
814 { 0x06F5, 0 }, /* 0x06F5 */
815 { 0x06F6, 0 }, /* 0x06F6 */
816 { 0x06F7, 0 }, /* 0x06F7 */
817 { 0x06F8, 0 }, /* 0x06F8 */
818 { 0x06F9, 0 }, /* 0x06F9 */
819 { 0x06FA, 0 }, /* 0x06FA */
820 { 0x06FB, 0 }, /* 0x06FB */
821 { 0x06FC, 0 }, /* 0x06FC */
822 { 0x06FD, 0 }, /* 0x06FD */
823 { 0x06FE, 0 }, /* 0x06FE */
824 { 0x06FF, 0 } /* 0x06FF */
827 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, this table does */
828 static const hb_uint16 alefMaksura[4] = {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9};
831 // this is a bit tricky. Alef always binds to the right, so the second parameter descibing the shape
832 // of the lam can be either initial of medial. So initial maps to the isolated form of the ligature,
833 // medial to the final form
835 static const hb_uint16 arabicUnicodeLamAlefMapping[6][4] = {
836 { 0xfffd, 0xfffd, 0xfef5, 0xfef6 }, /* 0x622 R Alef with Madda above */
837 { 0xfffd, 0xfffd, 0xfef7, 0xfef8 }, /* 0x623 R Alef with Hamza above */
838 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x624 // Just to fill the table ;-) */
839 { 0xfffd, 0xfffd, 0xfef9, 0xfefa }, /* 0x625 R Alef with Hamza below */
840 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x626 // Just to fill the table ;-) */
841 { 0xfffd, 0xfffd, 0xfefb, 0xfefc } /* 0x627 R Alef */
844 static int getShape(hb_uint8 cell, int shape)
846 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, handle this here */
847 int ch = (cell != 0x49)
848 ? (shape ? arabicUnicodeMapping[cell][0] + shape : 0x600+cell)
849 : alefMaksura[shape] ;
855 Two small helper functions for arabic shaping.
857 static HB_UChar16 prevChar(const HB_UChar16 *str, int pos)
859 /*qDebug("leftChar: pos=%d", pos); */
860 const HB_UChar16 *ch = str + pos - 1;
863 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
868 return ReplacementCharacter;
871 static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
873 const HB_UChar16 *ch = str + pos + 1;
876 /*qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); */
877 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
879 /* assume it's a transparent char, this might not be 100% correct */
883 return ReplacementCharacter;
886 static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength,
887 HB_Bool reverse, HB_GlyphAttributes *attributes, unsigned short *logClusters)
889 HB_ArabicProperties *properties;
892 const HB_UChar16 *ch;
896 HB_STACKARRAY(HB_ArabicProperties, props, len + 2);
899 assert(stringLength >= from + len);
911 if (f + l < stringLength)
913 getArabicProperties(uc+f, l, props);
919 for (i = 0; i < len; i++) {
920 hb_uint8 r = *ch >> 8;
921 int gpos = data - shapeBuffer;
925 if (*ch == 0x200c || *ch == 0x200d)
926 /* remove ZWJ and ZWNJ */
930 *data = HB_GetMirroredChar(*ch);
934 hb_uint8 c = *ch & 0xff;
936 int shape = properties[i].shape;
937 /* qDebug("mapping U+%x to shape %d glyph=0x%x", ch->unicode(), shape, getShape(c, shape)); */
938 /* take care of lam-alef ligatures (lam right of alef) */
941 case 0x44: { /* lam */
942 const HB_UChar16 pch = nextChar(uc, stringLength, pos);
943 if ((pch >> 8) == 0x06) {
944 switch (pch & 0xff) {
949 /* qDebug(" lam of lam-alef ligature"); */
950 map = arabicUnicodeLamAlefMapping[(pch & 0xff) - 0x22][shape];
958 case 0x22: /* alef with madda */
959 case 0x23: /* alef with hamza above */
960 case 0x25: /* alef with hamza below */
961 case 0x27: /* alef */
962 if (prevChar(uc, pos) == 0x0644) {
963 /* have a lam alef ligature */
964 /*qDebug(" alef of lam-alef ligature"); */
970 map = getShape(c, shape);
975 /*glyphs[gpos].attributes.zeroWidth = zeroWidth; */
976 if (HB_GetUnicodeCharCategory(*ch) == HB_Mark_NonSpacing) {
977 attributes[gpos].mark = TRUE;
978 /* qDebug("glyph %d (char %d) is mark!", gpos, i); */
980 attributes[gpos].mark = FALSE;
981 clusterStart = data - shapeBuffer;
983 attributes[gpos].clusterStart = !attributes[gpos].mark;
984 attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(*ch);
985 attributes[gpos].justification = properties[i].justification;
986 /* qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode());*/
990 logClusters[i] = clusterStart;
992 *shapedLength = data - shapeBuffer;
994 HB_FREE_STACKARRAY(props);
999 static const HB_OpenTypeFeature arabic_features[] = {
1000 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1001 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
1002 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
1003 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
1004 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1005 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
1006 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
1007 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
1008 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
1009 { HB_MAKE_TAG('c', 's', 'w', 'h'), CswhProperty },
1010 /* mset is used in old Win95 fonts that don't have a 'mark' positioning table. */
1011 { HB_MAKE_TAG('m', 's', 'e', 't'), MsetProperty },
1015 static const HB_OpenTypeFeature syriac_features[] = {
1016 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1017 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
1018 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
1019 { HB_MAKE_TAG('f', 'i', 'n', '2'), FinaProperty },
1020 { HB_MAKE_TAG('f', 'i', 'n', '3'), FinaProperty },
1021 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
1022 { HB_MAKE_TAG('m', 'e', 'd', '2'), MediProperty },
1023 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1024 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
1025 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
1026 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
1027 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
1031 static HB_Bool arabicSyriacOpenTypeShape(HB_ShaperItem *item, HB_Bool *ot_ok)
1033 const HB_UChar16 *uc;
1034 const int nglyphs = item->num_glyphs;
1037 HB_ArabicProperties *properties;
1038 HB_DECLARE_STACKARRAY(HB_ArabicProperties, props)
1039 HB_DECLARE_STACKARRAY(hb_uint32, apply)
1046 if (!HB_ConvertStringToGlyphIndices(item))
1048 HB_HeuristicSetGlyphAttributes(item);
1050 HB_INIT_STACKARRAY(HB_ArabicProperties, props, item->item.length + 2);
1051 HB_INIT_STACKARRAY(hb_uint32, apply, item->num_glyphs);
1053 uc = item->string + item->item.pos;
1057 l = item->item.length;
1058 if (item->item.pos > 0) {
1063 if (f + l + item->item.pos < item->stringLength) {
1066 if (item->item.script == HB_Script_Nko)
1067 getNkoProperties(uc+f, l, props);
1069 getArabicProperties(uc+f, l, props);
1071 for (i = 0; i < (int)item->num_glyphs; i++) {
1074 if (properties[i].shape == XIsolated)
1075 apply[i] |= MediProperty|FinaProperty|InitProperty;
1076 else if (properties[i].shape == XMedial)
1077 apply[i] |= IsolProperty|FinaProperty|InitProperty;
1078 else if (properties[i].shape == XFinal)
1079 apply[i] |= IsolProperty|MediProperty|InitProperty;
1080 else if (properties[i].shape == XInitial)
1081 apply[i] |= IsolProperty|MediProperty|FinaProperty;
1083 item->attributes[i].justification = properties[i].justification;
1086 HB_FREE_STACKARRAY(props);
1088 shaped = HB_OpenTypeShape(item, apply);
1090 HB_FREE_STACKARRAY(apply);
1097 positioned = HB_OpenTypePosition(item, nglyphs, /*doLogClusters*/TRUE);
1099 /* SAMSUNG bug fix for Arabic - Start */
1100 if(TRUE == item->item.bidiLevel)
1102 int j = item->num_glyphs/2;
1106 HB_Glyph glyph = item->glyphs[i];
1107 HB_Fixed advance = item->advances[i];
1108 HB_FixedPoint offset = item->offsets[i];
1109 int k = (item->num_glyphs-1)-i;
1111 item->glyphs[i] = item->glyphs[k];
1112 item->advances[i] = item->advances[k];
1113 item->offsets[i] = item->offsets[k];
1115 item->glyphs[k] = glyph;
1116 item->advances[k] = advance;
1117 item->offsets[k] = offset;
1121 /* SAMSUNG bug fix for Arabic - End */
1128 /* #### stil missing: identify invalid character combinations */
1129 HB_Bool HB_ArabicShape(HB_ShaperItem *item)
1133 HB_STACKARRAY(HB_UChar16, shapedChars, item->item.length);
1135 assert(item->item.script == HB_Script_Arabic || item->item.script == HB_Script_Syriac
1136 || item->item.script == HB_Script_Nko);
1140 if (HB_SelectScript(item, item->item.script == HB_Script_Arabic ? arabic_features : syriac_features)) {
1142 if (arabicSyriacOpenTypeShape(item, &ot_ok))
1146 /* fall through to the non OT code*/
1150 if (item->item.script != HB_Script_Arabic)
1151 return HB_BasicShape(item);
1153 shapedString(item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen,
1154 item->item.bidiLevel % 2,
1155 item->attributes, item->log_clusters);
1157 haveGlyphs = item->font->klass
1158 ->convertStringToGlyphIndices(item->font,
1160 item->glyphs, &item->num_glyphs,
1161 item->item.bidiLevel % 2);
1163 HB_FREE_STACKARRAY(shapedChars);
1168 HB_HeuristicPosition(item);