2 ******************************************************************************
4 * Copyright (C) 1999-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999jul27
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/uchar.h"
22 #include "unicode/ubidi.h"
23 #include "unicode/utf16.h"
24 #include "ubidi_props.h"
29 * General implementation notes:
31 * Throughout the implementation, there are comments like (W2) that refer to
32 * rules of the BiDi algorithm in its version 5, in this example to the second
33 * rule of the resolution of weak types.
35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
36 * character according to UTF-16, the second UChar gets the directional property of
37 * the entire character assigned, while the first one gets a BN, a boundary
38 * neutral, type, which is ignored by most of the algorithm according to
39 * rule (X9) and the implementation suggestions of the BiDi algorithm.
41 * Later, adjustWSLevels() will set the level for each BN to that of the
42 * following character (UChar), which results in surrogate pairs getting the
43 * same level on each of their surrogates.
45 * In a UTF-8 implementation, the same thing could be done: the last byte of
46 * a multi-byte sequence would get the "real" property, while all previous
47 * bytes of that sequence would get BN.
49 * It is not possible to assign all those parts of a character the same real
50 * property because this would fail in the resolution of weak types with rules
51 * that look at immediately surrounding types.
53 * As a related topic, this implementation does not remove Boundary Neutral
54 * types from the input, but ignores them wherever this is relevant.
55 * For example, the loop for the resolution of the weak types reads
56 * types until it finds a non-BN.
57 * Also, explicit embedding codes are neither changed into BN nor removed.
58 * They are only treated the same way real BNs are.
59 * As stated before, adjustWSLevels() takes care of them at the end.
60 * For the purpose of conformance, the levels of all these codes
63 * Note that this implementation never modifies the dirProps
64 * after the initial setup, except for FSI which is changed to either
65 * LRI or RLI in getDirProps(), and paired brackets which may be changed
66 * to L or R according to N0.
69 * In this implementation, the resolution of weak types (Wn),
70 * neutrals (Nn), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
95 * If there are no explicit embedding codes, then the (Xn) steps
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
122 /* UBiDi object management -------------------------------------------------- */
124 U_CAPI UBiDi * U_EXPORT2
127 UErrorCode errorCode=U_ZERO_ERROR;
128 return ubidi_openSized(0, 0, &errorCode);
131 U_CAPI UBiDi * U_EXPORT2
132 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 /* check the argument values */
136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
138 } else if(maxLength<0 || maxRunCount<0) {
139 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
140 return NULL; /* invalid arguments */
143 /* allocate memory for the object */
144 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
146 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
150 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
151 uprv_memset(pBiDi, 0, sizeof(UBiDi));
153 /* get BiDi properties */
154 pBiDi->bdp=ubidi_getSingleton();
156 /* allocate memory for arrays as requested */
158 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
159 !getInitialLevelsMemory(pBiDi, maxLength)
161 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
164 pBiDi->mayAllocateText=TRUE;
169 /* use simpleRuns[] */
170 pBiDi->runsSize=sizeof(Run);
171 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
172 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
175 pBiDi->mayAllocateRuns=TRUE;
178 if(U_SUCCESS(*pErrorCode)) {
187 * We are allowed to allocate memory if memory==NULL or
188 * mayAllocate==TRUE for each array that we need.
189 * We also try to grow memory as needed if we
192 * Assume sizeNeeded>0.
193 * If *pMemory!=NULL, then assume *pSize>0.
195 * ### this realloc() may unnecessarily copy the old data,
196 * which we know we don't need any more;
197 * is this the best way to do this??
200 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
201 void **pMemory = (void **)bidiMem;
202 /* check for existing memory */
204 /* we need to allocate memory */
205 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
212 if(sizeNeeded<=*pSize) {
213 /* there is already enough memory */
216 else if(!mayAllocate) {
217 /* not enough memory, and we must not allocate */
222 /* in most cases, we do not need the copy-old-data part of
223 * realloc, but it is needed when adding runs using getRunsMemory()
224 * in setParaRunsOnly()
226 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
231 /* we failed to grow */
238 U_CAPI void U_EXPORT2
239 ubidi_close(UBiDi *pBiDi) {
241 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
242 if(pBiDi->dirPropsMemory!=NULL) {
243 uprv_free(pBiDi->dirPropsMemory);
245 if(pBiDi->levelsMemory!=NULL) {
246 uprv_free(pBiDi->levelsMemory);
248 if(pBiDi->openingsMemory!=NULL) {
249 uprv_free(pBiDi->openingsMemory);
251 if(pBiDi->parasMemory!=NULL) {
252 uprv_free(pBiDi->parasMemory);
254 if(pBiDi->runsMemory!=NULL) {
255 uprv_free(pBiDi->runsMemory);
257 if(pBiDi->isolatesMemory!=NULL) {
258 uprv_free(pBiDi->isolatesMemory);
260 if(pBiDi->insertPoints.points!=NULL) {
261 uprv_free(pBiDi->insertPoints.points);
268 /* set to approximate "inverse BiDi" ---------------------------------------- */
270 U_CAPI void U_EXPORT2
271 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
273 pBiDi->isInverse=isInverse;
274 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
275 : UBIDI_REORDER_DEFAULT;
279 U_CAPI UBool U_EXPORT2
280 ubidi_isInverse(UBiDi *pBiDi) {
282 return pBiDi->isInverse;
288 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
290 * concept of RUNS_ONLY which is a double operation.
291 * It could be advantageous to divide this into 3 concepts:
292 * a) Operation: direct / inverse / RUNS_ONLY
293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
295 * This would allow combinations not possible today like RUNS_ONLY with
297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
298 * REMOVE_CONTROLS for the inverse step.
299 * Not all combinations would be supported, and probably not all do make sense.
300 * This would need to document which ones are supported and what are the
301 * fallbacks for unsupported combinations.
303 U_CAPI void U_EXPORT2
304 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
305 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
306 && (reorderingMode < UBIDI_REORDER_COUNT)) {
307 pBiDi->reorderingMode = reorderingMode;
308 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
312 U_CAPI UBiDiReorderingMode U_EXPORT2
313 ubidi_getReorderingMode(UBiDi *pBiDi) {
315 return pBiDi->reorderingMode;
317 return UBIDI_REORDER_DEFAULT;
321 U_CAPI void U_EXPORT2
322 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
323 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
324 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
327 pBiDi->reorderingOptions=reorderingOptions;
331 U_CAPI uint32_t U_EXPORT2
332 ubidi_getReorderingOptions(UBiDi *pBiDi) {
334 return pBiDi->reorderingOptions;
340 U_CAPI UBiDiDirection U_EXPORT2
341 ubidi_getBaseDirection(const UChar *text,
348 if( text==NULL || length<-1 ){
349 return UBIDI_NEUTRAL;
353 length=u_strlen(text);
356 for( i = 0 ; i < length; ) {
357 /* i is incremented by U16_NEXT */
358 U16_NEXT(text, i, length, uchar);
359 dir = u_charDirection(uchar);
360 if( dir == U_LEFT_TO_RIGHT )
362 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
365 return UBIDI_NEUTRAL;
368 /* perform (P2)..(P3) ------------------------------------------------------- */
371 * Returns the directionality of the first strong character
372 * after the last B in prologue, if any.
373 * Requires prologue!=null.
376 firstL_R_AL(UBiDi *pBiDi) {
377 const UChar *text=pBiDi->prologue;
378 int32_t length=pBiDi->proLength;
381 DirProp dirProp, result=ON;
382 for(i=0; i<length; ) {
383 /* i is incremented by U16_NEXT */
384 U16_NEXT(text, i, length, uchar);
385 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
387 if(dirProp==L || dirProp==R || dirProp==AL) {
400 * Check that there are enough entries in the array pointed to by pBiDi->paras
403 checkParaCount(UBiDi *pBiDi) {
404 int32_t count=pBiDi->paraCount;
405 if(pBiDi->paras==pBiDi->simpleParas) {
406 if(count<=SIMPLE_PARAS_SIZE)
408 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
410 pBiDi->paras=pBiDi->parasMemory;
411 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
414 if(!getInitialParasMemory(pBiDi, count * 2))
416 pBiDi->paras=pBiDi->parasMemory;
421 * Get the directional properties for the text, calculate the flags bit-set, and
422 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
426 getDirProps(UBiDi *pBiDi) {
427 const UChar *text=pBiDi->text;
428 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
430 int32_t i=0, originalLength=pBiDi->originalLength;
431 Flags flags=0; /* collect all directionalities in the text */
433 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
434 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
435 /* for inverse BiDi, the default para level is set to RTL if there is a
436 strong R or AL character at either end of the text */
437 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
438 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
439 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
440 int32_t lastArabicPos=-1;
441 int32_t controlCount=0;
442 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
443 UBIDI_OPTION_REMOVE_CONTROLS);
446 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
447 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
448 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
449 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
452 DirProp lastStrong=ON; /* for default level & inverse BiDi */
453 /* The following stacks are used to manage isolate sequences. Those
454 sequences may be nested, but obviously never more deeply than the
455 maximum explicit embedding level.
456 lastStack is the index of the last used entry in the stack. A value of -1
457 means that there is no open isolate sequence.
458 lastStack is reset to -1 on paragraph boundaries. */
459 /* The following stack contains the position of the initiator of
460 each open isolate sequence */
461 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
462 /* The following stack contains the last known state before
463 encountering the initiator of an isolate sequence */
464 int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
465 int32_t stackLast=-1;
467 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
469 defaultParaLevel=pBiDi->paraLevel&1;
471 pBiDi->paras[0].level=defaultParaLevel;
472 lastStrong=defaultParaLevel;
473 if(pBiDi->proLength>0 && /* there is a prologue */
474 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
476 pBiDi->paras[0].level=0; /* set the default para level */
478 pBiDi->paras[0].level=1; /* set the default para level */
479 state=NOT_SEEKING_STRONG;
481 state=SEEKING_STRONG_FOR_PARA;
484 pBiDi->paras[0].level=pBiDi->paraLevel;
485 state=NOT_SEEKING_STRONG;
487 /* count paragraphs and determine the paragraph level (P2..P3) */
489 * see comment in ubidi.h:
490 * the UBIDI_DEFAULT_XXX values are designed so that
491 * their bit 0 alone yields the intended default
493 for( /* i=0 above */ ; i<originalLength; ) {
494 /* i is incremented by U16_NEXT */
495 U16_NEXT(text, i, originalLength, uchar);
496 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
497 dirProps[i-1]=dirProp;
498 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
499 flags|=DIRPROP_FLAG(BN);
502 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
505 if(state==SEEKING_STRONG_FOR_PARA) {
506 pBiDi->paras[pBiDi->paraCount-1].level=0;
507 state=NOT_SEEKING_STRONG;
509 else if(state==SEEKING_STRONG_FOR_FSI) {
510 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
511 dirProps[isolateStartStack[stackLast]]=LRI;
512 flags|=DIRPROP_FLAG(LRI);
514 state=LOOKING_FOR_PDI;
519 if(dirProp==R || dirProp==AL) {
520 if(state==SEEKING_STRONG_FOR_PARA) {
521 pBiDi->paras[pBiDi->paraCount-1].level=1;
522 state=NOT_SEEKING_STRONG;
524 else if(state==SEEKING_STRONG_FOR_FSI) {
525 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
526 dirProps[isolateStartStack[stackLast]]=RLI;
527 flags|=DIRPROP_FLAG(RLI);
529 state=LOOKING_FOR_PDI;
536 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
538 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
539 isolateStartStack[stackLast]=i-1;
540 previousStateStack[stackLast]=state;
543 state=SEEKING_STRONG_FOR_FSI;
545 state=LOOKING_FOR_PDI;
549 if(state==SEEKING_STRONG_FOR_FSI) {
550 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
551 dirProps[isolateStartStack[stackLast]]=LRI;
552 flags|=DIRPROP_FLAG(LRI);
556 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
557 state=previousStateStack[stackLast];
563 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
565 pBiDi->paras[pBiDi->paraCount-1].limit=i;
566 if(isDefaultLevelInverse && lastStrong==R)
567 pBiDi->paras[pBiDi->paraCount-1].level=1;
568 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
569 /* When streaming, we only process whole paragraphs
570 thus some updates are only done on paragraph boundaries */
571 pBiDi->length=i; /* i is index to next character */
572 pBiDi->controlCount=controlCount;
574 if(i<originalLength) { /* B not last char in text */
576 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
579 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
580 state=SEEKING_STRONG_FOR_PARA;
581 lastStrong=defaultParaLevel;
583 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
584 state=NOT_SEEKING_STRONG;
591 /* Ignore still open isolate sequences with overflow */
592 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
593 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
594 if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI)
595 state=LOOKING_FOR_PDI;
597 /* Resolve direction of still unresolved open FSI sequences */
598 while(stackLast>=0) {
599 if(state==SEEKING_STRONG_FOR_FSI) {
600 dirProps[isolateStartStack[stackLast]]=LRI;
601 flags|=DIRPROP_FLAG(LRI);
603 state=previousStateStack[stackLast];
606 /* When streaming, ignore text after the last paragraph separator */
607 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
608 if(pBiDi->length<originalLength)
611 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
612 pBiDi->controlCount=controlCount;
614 /* For inverse bidi, default para direction is RTL if there is
615 a strong R or AL at either end of the paragraph */
616 if(isDefaultLevelInverse && lastStrong==R) {
617 pBiDi->paras[pBiDi->paraCount-1].level=1;
620 pBiDi->paraLevel=pBiDi->paras[0].level;
622 /* The following is needed to resolve the text direction for default level
623 paragraphs containing no strong character */
624 for(i=0; i<pBiDi->paraCount; i++)
625 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
627 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
628 flags|=DIRPROP_FLAG(L);
631 pBiDi->lastArabicPos=lastArabicPos;
635 /* determine the paragraph level at position index */
637 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
639 for(i=0; i<pBiDi->paraCount; i++)
640 if(pindex<pBiDi->paras[i].limit)
642 if(i>=pBiDi->paraCount)
643 i=pBiDi->paraCount-1;
644 return (UBiDiLevel)(pBiDi->paras[i].level);
647 /* Functions for handling paired brackets ----------------------------------- */
649 /* In the isoRuns array, the first entry is used for text outside of any
650 isolate sequence. Higher entries are used for each more deeply nested
651 isolate sequence. isoRunLast is the index of the last used entry. The
652 openings array is used to note the data of opening brackets not yet
653 matched by a closing bracket, or matched but still susceptible to change
655 Each isoRun entry contains the index of the first and
656 one-after-last openings entries for pending opening brackets it
657 contains. The next openings entry to use is the one-after-last of the
658 most deeply nested isoRun entry.
659 isoRun entries also contain their current embedding level and the last
660 encountered strong character, since these will be needed to resolve
661 the level of paired brackets. */
664 bracketInit(UBiDi *pBiDi, BracketData *bd) {
667 bd->isoRuns[0].start=0;
668 bd->isoRuns[0].limit=0;
669 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
670 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
671 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
672 if(pBiDi->openingsMemory) {
673 bd->openings=pBiDi->openingsMemory;
674 bd->openingsSize=pBiDi->openingsSize;
676 bd->openings=bd->simpleOpenings;
677 bd->openingsSize=SIMPLE_OPENINGS_SIZE;
679 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
680 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
683 /* paragraph boundary */
685 bracketProcessB(BracketData *bd, UBiDiLevel level) {
687 bd->isoRuns[0].limit=0;
688 bd->isoRuns[0].level=level;
689 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1;
690 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
693 /* LRE, LRO, RLE, RLO, PDF */
695 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
696 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
697 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
698 DirProp *dirProps=bd->pBiDi->dirProps;
699 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
701 if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)>
702 (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */
703 contextLevel=embeddingLevel;
704 pLastIsoRun->limit=pLastIsoRun->start;
705 pLastIsoRun->level=embeddingLevel;
706 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1;
707 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos;
712 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
713 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
715 lastLimit=pLastIsoRun->limit;
718 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
719 pLastIsoRun->level=level;
720 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1;
721 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0;
726 bracketProcessPDI(BracketData *bd) {
730 /* newly found opening bracket: create an openings entry */
731 static UBool /* return TRUE if success */
732 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
733 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
735 if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */
736 UBiDi *pBiDi=bd->pBiDi;
737 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
739 if(bd->openings==bd->simpleOpenings)
740 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
741 SIMPLE_OPENINGS_SIZE * sizeof(Opening));
742 bd->openings=pBiDi->openingsMemory; /* may have changed */
743 bd->openingsSize=pBiDi->openingsSize;
745 pOpening=&bd->openings[pLastIsoRun->limit];
746 pOpening->position=position;
747 pOpening->match=match;
748 pOpening->contextDir=pLastIsoRun->contextDir;
749 pOpening->contextPos=pLastIsoRun->contextPos;
751 pLastIsoRun->limit++;
755 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
757 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
758 /* This function calls itself recursively */
759 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
761 DirProp *dirProps=bd->pBiDi->dirProps;
762 int32_t k, openingPosition, closingPosition;
763 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
764 if(qOpening->match>=0) /* not an N0c match */
766 if(newPropPosition<qOpening->contextPos)
768 if(newPropPosition>=qOpening->position)
770 if(newProp==qOpening->contextDir)
772 openingPosition=qOpening->position;
773 dirProps[openingPosition]=dirProps[newPropPosition];
774 closingPosition=-(qOpening->match);
775 dirProps[closingPosition]= newProp; /* can never be AL */
776 qOpening->match=0; /* prevent further changes */
777 fixN0c(bd, k, openingPosition, newProp);
778 fixN0c(bd, k, closingPosition, newProp);
782 /* handle strong characters, digits and candidates for closing brackets */
783 static UBool /* return TRUE if success */
784 bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) {
786 Opening *pOpening, *qOpening;
787 DirProp *dirProps, newProp;
788 UBiDiDirection direction;
793 dirProps=bd->pBiDi->dirProps;
794 if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */
795 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
796 /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */
797 if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL))
798 dirProp=pLastIsoRun->lastStrong;
799 /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */
801 if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) {
803 if(!bd->isNumbersSpecial)
804 dirProps[position]=ENL;
807 dirProp=pLastIsoRun->lastStrong; /* may be R or AL */
808 if(!bd->isNumbersSpecial)
809 dirProps[position]= dirProp==AL ? AN : ENR;
812 pLastIsoRun->lastStrong=dirProp;
813 pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp);
814 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position;
815 if(dirProp==AL || dirProp==AN)
817 flag=DIRPROP_FLAG(dirProp);
818 /* strong characters found after an unmatched opening bracket
819 must be noted for possibly applying N0b */
820 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
821 bd->openings[i].flags|=flag;
826 /* First see if it is a matching closing bracket. Hopefully, this is more
827 efficient than checking if it is a closing bracket at all */
828 c=bd->pBiDi->text[position];
829 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
830 for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) {
831 if(bd->openings[i].match!=c)
833 /* We have a match */
834 pOpening=&bd->openings[i];
835 direction=pLastIsoRun->level&1;
836 stable=TRUE; /* assume stable until proved otherwise */
838 /* The stable flag is set when brackets are paired and their
839 level is resolved and cannot be changed by what will be
840 found later in the source string.
841 An unstable match can occur only when applying N0c, where
842 the resolved level depends on the preceding context, and
843 this context may be affected by text occurring later.
844 Example: RTL paragraph containing: abc[(latin) HEBREW]
845 When the closing parenthesis is encountered, it appears
846 that N0c1 must be applied since 'abc' sets an opposite
847 direction context and both parentheses receive level 2.
848 However, when the closing square bracket is processed,
849 N0b applies because of 'HEBREW' being included within the
850 brackets, thus the square brackets are treated like R and
851 receive level 1. However, this changes the preceding
852 context of the opening parenthesis, and it now appears
853 that N0c2 must be applied to the parentheses rather than
856 if((direction==0 && pOpening->flags&FOUND_L) ||
857 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
860 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
861 if(direction!=pOpening->contextDir) {
862 newProp=pOpening->contextDir; /* N0c1 */
863 /* it is stable if there is no preceding text or in
864 conditions too complicated and not worth checking */
865 stable=(i==pLastIsoRun->start);
868 newProp=direction; /* N0c2 */
871 newProp=BN; /* N0d */
874 dirProps[pOpening->position]=newProp;
875 dirProps[position]=newProp;
876 pLastIsoRun->contextDir=newProp;
877 pLastIsoRun->contextPos=position;
879 /* Update nested N0c pairs that may be affected */
880 if(newProp==direction)
881 fixN0c(bd, i, pOpening->position, newProp);
883 pLastIsoRun->limit=i; /* forget any brackets nested within this pair */
884 /* remove lower located synonyms if any */
885 while(pLastIsoRun->limit>pLastIsoRun->start &&
886 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
887 pLastIsoRun->limit--;
890 pOpening->match=-position;
891 /* neutralize lower located synonyms if any */
893 while(k>=pLastIsoRun->start &&
894 bd->openings[k].position==pOpening->position)
895 bd->openings[k--].match=0;
896 /* neutralize any unmatched opening between the current pair;
897 this will also neutralize higher located synonyms if any */
898 for(k=i+1; k<pLastIsoRun->limit; k++) {
899 qOpening=&bd->openings[k];
900 if(qOpening->position>=position)
902 if(qOpening->match>0)
908 /* We get here only if the ON character was not a matching closing bracket */
909 /* Now see if it is an opening bracket */
910 match=u_getBidiPairedBracket(c); /* get the matching char */
911 if(match==c) /* if no matching char */
913 if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN)
914 return TRUE; /* not an opening bracket */
915 /* special case: process synonyms
916 create an opening entry for each synonym */
917 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
918 if(!bracketAddOpening(bd, 0x3009, position))
921 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
922 if(!bracketAddOpening(bd, 0x232A, position))
925 return bracketAddOpening(bd, match, position);
928 /* perform (X1)..(X9) ------------------------------------------------------- */
930 /* determine if the text is mixed-directional or single-directional */
931 static UBiDiDirection
932 directionFromFlags(UBiDi *pBiDi) {
933 Flags flags=pBiDi->flags;
934 /* if the text contains AN and neutrals, then some neutrals may become RTL */
935 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
937 } else if(!(flags&MASK_LTR)) {
945 * Resolve the explicit levels as specified by explicit embedding codes.
946 * Recalculate the flags to have them reflect the real properties
947 * after taking the explicit embeddings into account.
949 * The BiDi algorithm is designed to result in the same behavior whether embedding
950 * levels are externally specified (from "styled text", supposedly the preferred
951 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
952 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
953 * However, in a real implementation, the removal of these codes and their index
954 * positions in the plain text is undesirable since it would result in
955 * reallocated, reindexed text.
956 * Instead, this implementation leaves the codes in there and just ignores them
957 * in the subsequent processing.
958 * In order to get the same reordering behavior, positions with a BN or a not-isolate
959 * explicit embedding code just get the same level assigned as the last "real"
962 * Some implementations, not this one, then overwrite some of these
963 * directionality properties at "real" same-level-run boundaries by
964 * L or R codes so that the resolution of weak types can be performed on the
965 * entire paragraph at once instead of having to parse it once more and
966 * perform that resolution on same-level-runs.
967 * This limits the scope of the implicit rules in effectively
968 * the same way as the run limits.
970 * Instead, this implementation does not modify these codes, except for
971 * paired brackets whose properties (ON) may be replaced by L or R.
972 * On one hand, the paragraph has to be scanned for same-level-runs, but
973 * on the other hand, this saves another loop to reset these codes,
974 * or saves making and modifying a copy of dirProps[].
977 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
980 * Handling the stack of explicit levels (Xn):
982 * With the BiDi stack of explicit levels, as pushed with each
983 * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI,
984 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
986 * In order to have a correct push-pop semantics even in the case of overflows,
987 * overflow counters and a valid isolate counter are used as described in UAX#9
988 * section 3.3.2 "Explicit Levels and Directions".
990 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
992 static UBiDiDirection
993 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
994 DirProp *dirProps=pBiDi->dirProps;
995 UBiDiLevel *levels=pBiDi->levels;
996 const UChar *text=pBiDi->text;
998 int32_t i=0, length=pBiDi->length;
999 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1001 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1002 UBiDiDirection direction;
1003 pBiDi->isolateCount=0;
1005 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1007 /* determine if the text is mixed-directional or single-directional */
1008 direction=directionFromFlags(pBiDi);
1010 /* we may not need to resolve any explicit levels */
1011 if((direction!=UBIDI_MIXED)) {
1012 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1015 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1016 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1017 /* set all levels to the paragraph level */
1018 int32_t paraIndex, start, limit;
1019 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1023 start=pBiDi->paras[paraIndex-1].limit;
1024 limit=pBiDi->paras[paraIndex].limit;
1025 level=pBiDi->paras[paraIndex].level;
1026 for(i=start; i<limit; i++)
1029 return direction; /* no bracket matching for inverse BiDi */
1031 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1032 /* no embeddings, set all levels to the paragraph level */
1033 /* we still have to perform bracket matching */
1034 int32_t paraIndex, start, limit;
1035 BracketData bracketData;
1036 bracketInit(pBiDi, &bracketData);
1037 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1041 start=pBiDi->paras[paraIndex-1].limit;
1042 limit=pBiDi->paras[paraIndex].limit;
1043 level=pBiDi->paras[paraIndex].level;
1044 for(i=start; i<limit; i++) {
1046 dirProp=dirProps[i];
1049 if(text[i]==CR && text[i+1]==LF)
1050 continue; /* skip CR when followed by LF */
1051 bracketProcessB(&bracketData, level);
1055 if(!bracketProcessChar(&bracketData, i, dirProp)) {
1056 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1064 /* continue to perform (Xn) */
1066 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1067 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1068 UBiDiLevel embeddingLevel=level, newLevel;
1069 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1070 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
1072 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1073 but we need one more entry as base */
1074 uint32_t stackLast=0;
1075 int32_t overflowIsolateCount=0;
1076 int32_t overflowEmbeddingCount=0;
1077 int32_t validIsolateCount=0;
1078 BracketData bracketData;
1079 bracketInit(pBiDi, &bracketData);
1080 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1082 /* recalculate the flags */
1085 for(i=0; i<length; ++i) {
1086 dirProp=dirProps[i];
1092 /* (X2, X3, X4, X5) */
1093 flags|=DIRPROP_FLAG(BN);
1094 if (dirProp==LRE || dirProp==LRO)
1095 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
1097 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
1098 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1099 overflowEmbeddingCount==0) {
1101 embeddingLevel=newLevel;
1102 if(dirProp==LRO || dirProp==RLO)
1103 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1105 stack[stackLast]=embeddingLevel;
1106 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1107 since this has already been done for newLevel which is
1108 the source for embeddingLevel.
1111 dirProps[i]|=IGNORE_CC;
1112 if(overflowIsolateCount==0)
1113 overflowEmbeddingCount++;
1118 flags|=DIRPROP_FLAG(BN);
1119 /* handle all the overflow cases first */
1120 if(overflowIsolateCount) {
1121 dirProps[i]|=IGNORE_CC;
1124 if(overflowEmbeddingCount) {
1125 dirProps[i]|=IGNORE_CC;
1126 overflowEmbeddingCount--;
1129 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1132 embeddingLevel=(UBiDiLevel)stack[stackLast];
1134 dirProps[i]|=IGNORE_CC;
1138 if(embeddingLevel!=previousLevel) {
1139 bracketProcessBoundary(&bracketData, lastCcPos,
1140 previousLevel, embeddingLevel);
1141 previousLevel=embeddingLevel;
1144 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
1145 level=embeddingLevel;
1147 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
1149 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
1150 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1151 overflowEmbeddingCount==0) {
1153 previousLevel=embeddingLevel;
1154 validIsolateCount++;
1155 if(validIsolateCount>pBiDi->isolateCount)
1156 pBiDi->isolateCount=validIsolateCount;
1157 embeddingLevel=newLevel;
1159 stack[stackLast]=embeddingLevel+ISOLATE;
1160 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1162 dirProps[i]|=IGNORE_CC;
1163 overflowIsolateCount++;
1167 if(embeddingLevel!=previousLevel) {
1168 bracketProcessBoundary(&bracketData, lastCcPos,
1169 previousLevel, embeddingLevel);
1172 if(overflowIsolateCount) {
1173 dirProps[i]|=IGNORE_CC;
1174 overflowIsolateCount--;
1176 else if(validIsolateCount) {
1178 overflowEmbeddingCount=0;
1179 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1180 stackLast--; /* until the last isolate entry */
1181 stackLast--; /* pop also the last isolate entry */
1182 validIsolateCount--;
1183 bracketProcessPDI(&bracketData);
1185 dirProps[i]|=IGNORE_CC;
1186 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1187 previousLevel=level=embeddingLevel;
1188 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
1191 level=GET_PARALEVEL(pBiDi, i);
1193 if(text[i]==CR && text[i+1]==LF)
1194 break; /* skip CR when followed by LF */
1195 overflowEmbeddingCount=overflowIsolateCount=0;
1196 validIsolateCount=0;
1198 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1199 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1200 bracketProcessB(&bracketData, embeddingLevel);
1202 flags|=DIRPROP_FLAG(B);
1205 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1206 /* they will get their levels set correctly in adjustWSLevels() */
1207 flags|=DIRPROP_FLAG(BN);
1210 /* all other types get the "real" level */
1211 level=embeddingLevel;
1212 if(embeddingLevel!=previousLevel) {
1213 bracketProcessBoundary(&bracketData, lastCcPos,
1214 previousLevel, embeddingLevel);
1215 previousLevel=embeddingLevel;
1217 if(level&UBIDI_LEVEL_OVERRIDE)
1218 flags|=DIRPROP_FLAG_LR(level);
1220 flags|=DIRPROP_FLAG(dirProp);
1221 if(!bracketProcessChar(&bracketData, i, dirProp))
1227 * We need to set reasonable levels even on BN codes and
1228 * explicit codes because we will later look at same-level runs (X10).
1231 if(i>0 && levels[i-1]!=level) {
1232 flags|=DIRPROP_FLAG_MULTI_RUNS;
1233 if(level&UBIDI_LEVEL_OVERRIDE)
1234 flags|=DIRPROP_FLAG_O(level);
1236 flags|=DIRPROP_FLAG_E(level);
1238 if(DIRPROP_FLAG(dirProp)&MASK_ISO)
1239 level=embeddingLevel;
1241 if(flags&MASK_EMBEDDING) {
1242 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1244 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
1245 flags|=DIRPROP_FLAG(L);
1248 /* subsequently, ignore the explicit codes and BN (X9) */
1250 /* again, determine if the text is mixed-directional or single-directional */
1252 direction=directionFromFlags(pBiDi);
1258 * Use a pre-specified embedding levels array:
1260 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1261 * ignore all explicit codes (X9),
1262 * and check all the preset levels.
1264 * Recalculate the flags to have them reflect the real properties
1265 * after taking the explicit embeddings into account.
1267 static UBiDiDirection
1268 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1269 DirProp *dirProps=pBiDi->dirProps;
1271 UBiDiLevel *levels=pBiDi->levels;
1272 int32_t isolateCount=0;
1274 int32_t i, length=pBiDi->length;
1275 Flags flags=0; /* collect all directionalities in the text */
1277 pBiDi->isolateCount=0;
1279 for(i=0; i<length; ++i) {
1281 dirProp=dirProps[i];
1282 if(dirProp==LRI || dirProp==RLI) {
1284 if(isolateCount>pBiDi->isolateCount)
1285 pBiDi->isolateCount=isolateCount;
1287 else if(dirProp==PDI)
1291 if(level&UBIDI_LEVEL_OVERRIDE) {
1292 /* keep the override flag in levels[i] but adjust the flags */
1293 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
1294 flags|=DIRPROP_FLAG_O(level);
1297 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1299 if((level<GET_PARALEVEL(pBiDi, i) &&
1300 !((0==level)&&(dirProp==B))) ||
1301 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
1302 /* level out of bounds */
1303 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1307 if(flags&MASK_EMBEDDING) {
1308 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1311 /* determine if the text is mixed-directional or single-directional */
1313 return directionFromFlags(pBiDi);
1316 /******************************************************************
1317 The Properties state machine table
1318 *******************************************************************
1320 All table cells are 8 bits:
1321 bits 0..4: next state
1322 bits 5..7: action to perform (if > 0)
1324 Cells may be of format "n" where n represents the next state
1325 (except for the rightmost column).
1326 Cells may also be of format "s(x,y)" where x represents an action
1327 to perform and y represents the next state.
1329 *******************************************************************
1330 Definitions and type for properties state table
1331 *******************************************************************
1333 #define IMPTABPROPS_COLUMNS 16
1334 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1335 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1336 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1337 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1339 static const uint8_t groupProp[] = /* dirProp regrouped */
1341 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1342 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1344 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1346 /******************************************************************
1348 PROPERTIES STATE TABLE
1350 In table impTabProps,
1351 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1352 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1353 - the Res column is the reduced property assigned to a run
1355 Action 1: process current run1, init new run1
1357 3: process run1, process run2, init new run1
1358 4: process run1, set run1=run2, init new run2
1361 1) This table is used in resolveImplicitLevels().
1362 2) This table triggers actions when there is a change in the Bidi
1363 property of incoming characters (action 1).
1364 3) Most such property sequences are processed immediately (in
1365 fact, passed to processPropertySeq().
1366 4) However, numbers are assembled as one sequence. This means
1367 that undefined situations (like CS following digits, until
1368 it is known if the next char will be a digit) are held until
1369 following chars define them.
1370 Example: digits followed by CS, then comes another CS or ON;
1371 the digits will be processed, then the CS assigned
1372 as the start of an ON sequence (action 3).
1373 5) There are cases where more than one sequence must be
1374 processed, for instance digits followed by CS followed by L:
1375 the digits must be processed as one sequence, and the CS
1376 must be processed as an ON sequence, all this before starting
1377 assembling chars for the opening L sequence.
1381 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1383 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1384 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1385 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1386 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1387 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1388 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1389 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1390 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1391 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1392 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1393 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1394 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1395 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1396 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1397 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1398 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1399 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1400 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1401 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1402 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1403 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1404 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1405 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1406 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1407 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1410 /* we must undef macro s because the levels table have a different
1411 * structure (4 bits for action and 4 bits for next state.
1415 /******************************************************************
1416 The levels state machine tables
1417 *******************************************************************
1419 All table cells are 8 bits:
1420 bits 0..3: next state
1421 bits 4..7: action to perform (if > 0)
1423 Cells may be of format "n" where n represents the next state
1424 (except for the rightmost column).
1425 Cells may also be of format "s(x,y)" where x represents an action
1426 to perform and y represents the next state.
1428 This format limits each table to 16 states each and to 15 actions.
1430 *******************************************************************
1431 Definitions and type for levels state tables
1432 *******************************************************************
1434 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1435 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1436 #define GET_STATE(cell) ((cell)&0x0f)
1437 #define GET_ACTION(cell) ((cell)>>4)
1438 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1440 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1441 typedef uint8_t ImpAct[];
1443 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1444 * instead of having a pair of ImpTab and a pair of ImpAct.
1446 typedef struct ImpTabPair {
1447 const void * pImpTab[2];
1448 const void * pImpAct[2];
1451 /******************************************************************
1455 In all levels state tables,
1456 - state 0 is the initial state
1457 - the Res column is the increment to add to the text level
1458 for this property sequence.
1460 The impAct arrays for each table of a pair map the local action
1461 numbers of the table to the total list of actions. For instance,
1462 action 2 in a given table corresponds to the action number which
1463 appears in entry [2] of the impAct array for that table.
1464 The first entry of all impAct arrays must be 0.
1466 Action 1: init conditional sequence
1467 2: prepend conditional sequence to current sequence
1468 3: set ON sequence to new level - 1
1469 4: init EN/AN/ON sequence
1470 5: fix EN/AN/ON sequence followed by R
1471 6: set previous level sequence to level 2
1474 1) These tables are used in processPropertySeq(). The input
1475 is property sequences as determined by resolveImplicitLevels.
1476 2) Most such property sequences are processed immediately
1477 (levels are assigned).
1478 3) However, some sequences cannot be assigned a final level till
1479 one or more following sequences are received. For instance,
1480 ON following an R sequence within an even-level paragraph.
1481 If the following sequence is R, the ON sequence will be
1482 assigned basic run level+1, and so will the R sequence.
1483 4) S is generally handled like ON, since its level will be fixed
1484 to paragraph level in adjustWSLevels().
1488 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
1489 /* In this table, conditional sequences receive the higher possible level
1490 until proven otherwise.
1493 /* L , R , EN , AN , ON , S , B , Res */
1494 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1495 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1496 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1497 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1498 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
1499 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
1501 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1502 /* In this table, conditional sequences receive the lower possible level
1503 until proven otherwise.
1506 /* L , R , EN , AN , ON , S , B , Res */
1507 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1508 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1509 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1510 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1511 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1512 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1514 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
1515 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1517 {&impAct0, &impAct0}};
1519 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
1520 /* In this table, conditional sequences receive the higher possible level
1521 until proven otherwise.
1524 /* L , R , EN , AN , ON , S , B , Res */
1525 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
1526 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
1527 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
1528 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
1529 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1531 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1533 {&impAct0, &impAct0}};
1535 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1536 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1537 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1540 /* L , R , EN , AN , ON , S , B , Res */
1541 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1542 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1543 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1544 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1545 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1546 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1548 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1549 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1550 until proven that there is L on both sides. AN is handled like EN.
1553 /* L , R , EN , AN , ON , S , B , Res */
1554 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1555 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1556 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1557 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1558 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1560 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1561 {&impTabL_GROUP_NUMBERS_WITH_R,
1562 &impTabR_GROUP_NUMBERS_WITH_R},
1563 {&impAct0, &impAct0}};
1566 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1567 /* This table is identical to the Default LTR table except that EN and AN are
1571 /* L , R , EN , AN , ON , S , B , Res */
1572 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1573 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1574 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1575 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1576 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1577 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1579 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1580 /* This table is identical to the Default RTL table except that EN and AN are
1584 /* L , R , EN , AN , ON , S , B , Res */
1585 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1586 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1587 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1588 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1589 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1590 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1592 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1593 {&impTabL_INVERSE_NUMBERS_AS_L,
1594 &impTabR_INVERSE_NUMBERS_AS_L},
1595 {&impAct0, &impAct0}};
1597 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1598 /* In this table, conditional sequences receive the lower possible level
1599 until proven otherwise.
1602 /* L , R , EN , AN , ON , S , B , Res */
1603 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1604 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1605 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1606 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1607 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1608 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1609 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1611 static const ImpAct impAct1 = {0,1,11,12};
1612 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1614 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1616 &impTabR_INVERSE_LIKE_DIRECT},
1617 {&impAct0, &impAct1}};
1619 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1620 /* The case handled in this table is (visually): R EN L
1623 /* L , R , EN , AN , ON , S , B , Res */
1624 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1625 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1626 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1627 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1628 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1629 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1630 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1632 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1633 /* The cases handled in this table are (visually): R EN L
1637 /* L , R , EN , AN , ON , S , B , Res */
1638 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1639 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1640 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1641 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1642 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1643 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1644 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1646 static const ImpAct impAct2 = {0,1,7,8,9,10};
1647 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1648 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1649 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1650 {&impAct0, &impAct2}};
1652 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1653 {&impTabL_NUMBERS_SPECIAL,
1654 &impTabR_INVERSE_LIKE_DIRECT},
1655 {&impAct0, &impAct1}};
1657 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1658 /* The case handled in this table is (visually): R EN L
1661 /* L , R , EN , AN , ON , S , B , Res */
1662 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1663 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1664 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1665 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1666 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1668 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1669 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1670 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1671 {&impAct0, &impAct2}};
1676 const ImpTab * pImpTab; /* level table pointer */
1677 const ImpAct * pImpAct; /* action map array */
1678 int32_t startON; /* start of ON sequence */
1679 int32_t startL2EN; /* start of level 2 sequence */
1680 int32_t lastStrongRTL; /* index of last found R or AL */
1681 int32_t state; /* current state */
1682 int32_t runStart; /* start position of the run */
1683 UBiDiLevel runLevel; /* run level before implicit solving */
1686 /*------------------------------------------------------------------------*/
1689 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1690 /* param pos: position where to insert
1691 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1694 #define FIRSTALLOC 10
1696 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1698 if (pInsertPoints->capacity == 0)
1700 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1701 if (pInsertPoints->points == NULL)
1703 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1706 pInsertPoints->capacity=FIRSTALLOC;
1708 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1710 void * savePoints=pInsertPoints->points;
1711 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1712 pInsertPoints->capacity*2*sizeof(Point));
1713 if (pInsertPoints->points == NULL)
1715 pInsertPoints->points=savePoints;
1716 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1719 else pInsertPoints->capacity*=2;
1723 pInsertPoints->points[pInsertPoints->size]=point;
1724 pInsertPoints->size++;
1728 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1731 * This implementation of the (Wn) rules applies all rules in one pass.
1732 * In order to do so, it needs a look-ahead of typically 1 character
1733 * (except for W5: sequences of ET) and keeps track of changes
1734 * in a rule Wp that affect a later Wq (p<q).
1736 * The (Nn) and (In) rules are also performed in that same single loop,
1737 * but effectively one iteration behind for white space.
1739 * Since all implicit rules are performed in one step, it is not necessary
1740 * to actually store the intermediate directional properties in dirProps[].
1744 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1745 int32_t start, int32_t limit) {
1746 uint8_t cell, oldStateSeq, actionSeq;
1747 const ImpTab * pImpTab=pLevState->pImpTab;
1748 const ImpAct * pImpAct=pLevState->pImpAct;
1749 UBiDiLevel * levels=pBiDi->levels;
1750 UBiDiLevel level, addLevel;
1751 InsertPoints * pInsertPoints;
1754 start0=start; /* save original start position */
1755 oldStateSeq=(uint8_t)pLevState->state;
1756 cell=(*pImpTab)[oldStateSeq][_prop];
1757 pLevState->state=GET_STATE(cell); /* isolate the new state */
1758 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1759 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1763 case 1: /* init ON seq */
1764 pLevState->startON=start0;
1767 case 2: /* prepend ON seq to current seq */
1768 start=pLevState->startON;
1771 case 3: /* L or S after possible relevant EN/AN */
1772 /* check if we had EN after R/AL */
1773 if (pLevState->startL2EN >= 0) {
1774 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1776 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1777 /* check if we had any relevant EN/AN after R/AL */
1778 pInsertPoints=&(pBiDi->insertPoints);
1779 if ((pInsertPoints->capacity == 0) ||
1780 (pInsertPoints->size <= pInsertPoints->confirmed))
1782 /* nothing, just clean up */
1783 pLevState->lastStrongRTL=-1;
1784 /* check if we have a pending conditional segment */
1785 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1786 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1787 start=pLevState->startON; /* reset to basic run level */
1789 if (_prop == DirProp_S) /* add LRM before S */
1791 addPoint(pBiDi, start0, LRM_BEFORE);
1792 pInsertPoints->confirmed=pInsertPoints->size;
1796 /* reset previous RTL cont to level for LTR text */
1797 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1799 /* reset odd level, leave runLevel+2 as is */
1800 levels[k]=(levels[k] - 2) & ~1;
1802 /* mark insert points as confirmed */
1803 pInsertPoints->confirmed=pInsertPoints->size;
1804 pLevState->lastStrongRTL=-1;
1805 if (_prop == DirProp_S) /* add LRM before S */
1807 addPoint(pBiDi, start0, LRM_BEFORE);
1808 pInsertPoints->confirmed=pInsertPoints->size;
1812 case 4: /* R/AL after possible relevant EN/AN */
1814 pInsertPoints=&(pBiDi->insertPoints);
1815 if (pInsertPoints->capacity > 0)
1816 /* remove all non confirmed insert points */
1817 pInsertPoints->size=pInsertPoints->confirmed;
1818 pLevState->startON=-1;
1819 pLevState->startL2EN=-1;
1820 pLevState->lastStrongRTL=limit - 1;
1823 case 5: /* EN/AN after R/AL + possible cont */
1824 /* check for real AN */
1825 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1826 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1829 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1831 /* just note the righmost digit as a strong RTL */
1832 pLevState->lastStrongRTL=limit - 1;
1835 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1837 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1838 pLevState->startL2EN=-2;
1841 addPoint(pBiDi, start0, LRM_BEFORE);
1844 /* if first EN/AN after R/AL */
1845 if (pLevState->startL2EN == -1) {
1846 pLevState->startL2EN=start0;
1850 case 6: /* note location of latest R/AL */
1851 pLevState->lastStrongRTL=limit - 1;
1852 pLevState->startON=-1;
1855 case 7: /* L after R+ON/EN/AN */
1856 /* include possible adjacent number on the left */
1857 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1859 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1860 pInsertPoints=&(pBiDi->insertPoints);
1861 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1863 pLevState->startON=start0;
1866 case 8: /* AN after L */
1867 /* AN numbers between L text on both sides may be trouble. */
1868 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1869 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1870 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
1873 case 9: /* R after L+ON/EN/AN */
1874 /* false alert, infirm LRMs around previous AN */
1875 pInsertPoints=&(pBiDi->insertPoints);
1876 pInsertPoints->size=pInsertPoints->confirmed;
1877 if (_prop == DirProp_S) /* add RLM before S */
1879 addPoint(pBiDi, start0, RLM_BEFORE);
1880 pInsertPoints->confirmed=pInsertPoints->size;
1884 case 10: /* L after L+ON/AN */
1885 level=pLevState->runLevel + addLevel;
1886 for(k=pLevState->startON; k<start0; k++) {
1887 if (levels[k]<level)
1890 pInsertPoints=&(pBiDi->insertPoints);
1891 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
1892 pLevState->startON=start0;
1895 case 11: /* L after L+ON+EN/AN/ON */
1896 level=pLevState->runLevel;
1897 for(k=start0-1; k>=pLevState->startON; k--) {
1898 if(levels[k]==level+3) {
1899 while(levels[k]==level+3) {
1902 while(levels[k]==level) {
1906 if(levels[k]==level+2) {
1914 case 12: /* R after L+ON+EN/AN/ON */
1915 level=pLevState->runLevel+1;
1916 for(k=start0-1; k>=pLevState->startON; k--) {
1917 if(levels[k]>level) {
1923 default: /* we should never get here */
1928 if((addLevel) || (start < start0)) {
1929 level=pLevState->runLevel + addLevel;
1930 if(start>=pLevState->runStart) {
1931 for(k=start; k<limit; k++) {
1935 DirProp *dirProps=pBiDi->dirProps, dirProp;
1936 int32_t isolateCount=0;
1937 for(k=start; k<limit; k++) {
1938 dirProp=dirProps[k];
1943 if(dirProp==LRI || dirProp==RLI)
1951 * Returns the directionality of the last strong character at the end of the prologue, if any.
1952 * Requires prologue!=null.
1955 lastL_R_AL(UBiDi *pBiDi) {
1956 const UChar *text=pBiDi->prologue;
1957 int32_t length=pBiDi->proLength;
1961 for(i=length; i>0; ) {
1962 /* i is decremented by U16_PREV */
1963 U16_PREV(text, 0, i, uchar);
1964 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1968 if(dirProp==R || dirProp==AL) {
1979 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
1980 * Requires epilogue!=null.
1983 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
1984 const UChar *text=pBiDi->epilogue;
1985 int32_t length=pBiDi->epiLength;
1989 for(i=0; i<length; ) {
1990 /* i is incremented by U16_NEXT */
1991 U16_NEXT(text, i, length, uchar);
1992 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1996 if(dirProp==R || dirProp==AL) {
2010 resolveImplicitLevels(UBiDi *pBiDi,
2011 int32_t start, int32_t limit,
2012 DirProp sor, DirProp eor) {
2013 const DirProp *dirProps=pBiDi->dirProps;
2016 int32_t i, start1, start2;
2017 uint16_t oldStateImp, stateImp, actionImp;
2018 uint8_t gprop, resProp, cell;
2020 DirProp nextStrongProp=R;
2021 int32_t nextStrongPos=-1;
2023 /* check for RTL inverse BiDi mode */
2024 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2025 * loop on the text characters from end to start.
2026 * This would need a different properties state table (at least different
2027 * actions) and different levels state tables (maybe very similar to the
2028 * LTR corresponding ones.
2031 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2032 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2033 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2035 /* initialize for property and levels state tables */
2036 levState.startON=-1;
2037 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2038 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2039 levState.runStart=start;
2040 levState.runLevel=pBiDi->levels[start];
2041 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2042 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2043 if(start==0 && pBiDi->proLength>0) {
2044 DirProp lastStrong=lastL_R_AL(pBiDi);
2045 if(lastStrong!=DirProp_ON) {
2049 /* The isolates[] entries contain enough information to
2050 resume the bidi algorithm in the same state as it was
2051 when it was interrupted by an isolate sequence. */
2052 if(dirProps[start]==PDI) {
2053 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2054 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2055 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2056 pBiDi->isolateCount--;
2059 if(dirProps[start]==NSM)
2064 processPropertySeq(pBiDi, &levState, sor, start, start);
2068 for(i=start; i<=limit; i++) {
2071 dirProp=pBiDi->dirProps[limit-1];
2072 if(dirProp==LRI || dirProp==RLI)
2073 break; /* no forced closing for sequence ending with LRI/RLI */
2077 DirProp prop, prop1;
2078 prop=PURE_DIRPROP(dirProps[i]);
2081 /* AL before EN does not make it AN */
2083 } else if(prop==EN) {
2084 if(nextStrongPos<=i) {
2085 /* look for next strong char (L/R/AL) */
2087 nextStrongProp=R; /* set default */
2088 nextStrongPos=limit;
2089 for(j=i+1; j<limit; j++) {
2091 if(prop1==L || prop1==R || prop1==AL) {
2092 nextStrongProp=prop1;
2098 if(nextStrongProp==AL) {
2103 gprop=groupProp[prop];
2105 oldStateImp=stateImp;
2106 cell=impTabProps[oldStateImp][gprop];
2107 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2108 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2109 if((i==limit) && (actionImp==0)) {
2110 /* there is an unprocessed sequence if its property == eor */
2111 actionImp=1; /* process the last sequence */
2114 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2116 case 1: /* process current seq1, init new seq1 */
2117 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2120 case 2: /* init new seq2 */
2123 case 3: /* process seq1, process seq2, init new seq1 */
2124 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2125 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2128 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2129 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2133 default: /* we should never get here */
2140 /* flush possible pending sequence, e.g. ON */
2141 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2142 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2143 if(firstStrong!=DirProp_ON) {
2148 dirProp=dirProps[limit-1];
2149 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2150 pBiDi->isolateCount++;
2151 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2152 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2153 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2156 processPropertySeq(pBiDi, &levState, eor, limit, limit);
2159 /* perform (L1) and (X9) ---------------------------------------------------- */
2162 * Reset the embedding levels for some non-graphic characters (L1).
2163 * This function also sets appropriate levels for BN, and
2164 * explicit embedding types that are supposed to have been removed
2165 * from the paragraph in (X9).
2168 adjustWSLevels(UBiDi *pBiDi) {
2169 const DirProp *dirProps=pBiDi->dirProps;
2170 UBiDiLevel *levels=pBiDi->levels;
2173 if(pBiDi->flags&MASK_WS) {
2174 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2177 i=pBiDi->trailingWSStart;
2179 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2180 while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) {
2181 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2184 levels[i]=GET_PARALEVEL(pBiDi, i);
2188 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2189 /* here, i+1 is guaranteed to be <length */
2191 flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
2192 if(flag&MASK_BN_EXPLICIT) {
2193 levels[i]=levels[i+1];
2194 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2197 } else if(flag&MASK_B_S) {
2198 levels[i]=GET_PARALEVEL(pBiDi, i);
2206 U_CAPI void U_EXPORT2
2207 ubidi_setContext(UBiDi *pBiDi,
2208 const UChar *prologue, int32_t proLength,
2209 const UChar *epilogue, int32_t epiLength,
2210 UErrorCode *pErrorCode) {
2211 /* check the argument values */
2212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2213 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2214 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2215 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2220 pBiDi->proLength=u_strlen(prologue);
2222 pBiDi->proLength=proLength;
2225 pBiDi->epiLength=u_strlen(epilogue);
2227 pBiDi->epiLength=epiLength;
2229 pBiDi->prologue=prologue;
2230 pBiDi->epilogue=epilogue;
2234 setParaSuccess(UBiDi *pBiDi) {
2235 pBiDi->proLength=0; /* forget the last context */
2237 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2240 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2241 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2244 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2245 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2246 void *runsOnlyMemory;
2249 int32_t saveLength, saveTrailingWSStart;
2250 const UBiDiLevel *levels;
2251 UBiDiLevel *saveLevels;
2252 UBiDiDirection saveDirection;
2253 UBool saveMayAllocateText;
2255 int32_t visualLength, i, j, visualStart, logicalStart,
2256 runCount, runLength, addedRuns, insertRemove,
2257 start, limit, step, indexOddBit, logicalPos,
2259 uint32_t saveOptions;
2261 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2263 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2266 /* obtain memory for mapping table and visual text */
2267 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
2268 if(runsOnlyMemory==NULL) {
2269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2272 visualMap=runsOnlyMemory;
2273 visualText=(UChar *)&visualMap[length];
2274 saveLevels=(UBiDiLevel *)&visualText[length];
2275 saveOptions=pBiDi->reorderingOptions;
2276 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2277 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2278 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2280 paraLevel&=1; /* accept only 0 or 1 */
2281 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2282 if(U_FAILURE(*pErrorCode)) {
2285 /* we cannot access directly pBiDi->levels since it is not yet set if
2286 * direction is not MIXED
2288 levels=ubidi_getLevels(pBiDi, pErrorCode);
2289 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
2290 saveTrailingWSStart=pBiDi->trailingWSStart;
2291 saveLength=pBiDi->length;
2292 saveDirection=pBiDi->direction;
2294 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2295 * the visual map and the dirProps array to drive the second call
2296 * to ubidi_setPara (but must make provision for possible removal of
2297 * BiDi controls. Alternatively, only use the dirProps array via
2298 * customized classifier callback.
2300 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2301 UBIDI_DO_MIRRORING, pErrorCode);
2302 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2303 if(U_FAILURE(*pErrorCode)) {
2306 pBiDi->reorderingOptions=saveOptions;
2308 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2310 /* Because what we did with reorderingOptions, visualText may be shorter
2311 * than the original text. But we don't want the levels memory to be
2312 * reallocated shorter than the original length, since we need to restore
2313 * the levels as after the first call to ubidi_setpara() before returning.
2314 * We will force mayAllocateText to FALSE before the second call to
2315 * ubidi_setpara(), and will restore it afterwards.
2317 saveMayAllocateText=pBiDi->mayAllocateText;
2318 pBiDi->mayAllocateText=FALSE;
2319 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2320 pBiDi->mayAllocateText=saveMayAllocateText;
2321 ubidi_getRuns(pBiDi, pErrorCode);
2322 if(U_FAILURE(*pErrorCode)) {
2325 /* check if some runs must be split, count how many splits */
2327 runCount=pBiDi->runCount;
2330 for(i=0; i<runCount; i++, visualStart+=runLength) {
2331 runLength=runs[i].visualLimit-visualStart;
2335 logicalStart=GET_INDEX(runs[i].logicalStart);
2336 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2337 index0=visualMap[j];
2338 index1=visualMap[j-1];
2339 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2345 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2347 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2348 pBiDi->runsMemory[0]=runs[0];
2350 runs=pBiDi->runs=pBiDi->runsMemory;
2351 pBiDi->runCount+=addedRuns;
2356 /* split runs which are not consecutive in source text */
2357 for(i=runCount-1; i>=0; i--) {
2358 runLength= i==0 ? runs[0].visualLimit :
2359 runs[i].visualLimit-runs[i-1].visualLimit;
2360 logicalStart=runs[i].logicalStart;
2361 indexOddBit=GET_ODD_BIT(logicalStart);
2362 logicalStart=GET_INDEX(logicalStart);
2365 runs[i+addedRuns]=runs[i];
2367 logicalPos=visualMap[logicalStart];
2368 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2369 saveLevels[logicalPos]^indexOddBit);
2374 limit=logicalStart+runLength-1;
2377 start=logicalStart+runLength-1;
2381 for(j=start; j!=limit; j+=step) {
2382 index0=visualMap[j];
2383 index1=visualMap[j+step];
2384 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2385 logicalPos=BIDI_MIN(visualMap[start], index0);
2386 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2387 saveLevels[logicalPos]^indexOddBit);
2388 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2389 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2390 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2391 runs[i+addedRuns].insertRemove=insertRemove;
2392 runs[i].insertRemove&=~insertRemove;
2398 runs[i+addedRuns]=runs[i];
2400 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2401 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2402 saveLevels[logicalPos]^indexOddBit);
2406 /* restore initial paraLevel */
2407 pBiDi->paraLevel^=1;
2409 /* restore real text */
2411 pBiDi->length=saveLength;
2412 pBiDi->originalLength=length;
2413 pBiDi->direction=saveDirection;
2414 /* the saved levels should never excess levelsSize, but we check anyway */
2415 if(saveLength>pBiDi->levelsSize) {
2416 saveLength=pBiDi->levelsSize;
2418 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
2419 pBiDi->trailingWSStart=saveTrailingWSStart;
2420 /* free memory for mapping table and visual text */
2421 uprv_free(runsOnlyMemory);
2422 if(pBiDi->runCount>1) {
2423 pBiDi->direction=UBIDI_MIXED;
2426 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2429 /* ubidi_setPara ------------------------------------------------------------ */
2431 U_CAPI void U_EXPORT2
2432 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2433 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2434 UErrorCode *pErrorCode) {
2435 UBiDiDirection direction;
2437 /* check the argument values */
2438 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2439 if(pBiDi==NULL || text==NULL || length<-1 ||
2440 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2441 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2446 length=u_strlen(text);
2449 /* special treatment for RUNS_ONLY mode */
2450 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2451 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2455 /* initialize the UBiDi structure */
2456 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
2458 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2459 pBiDi->paraLevel=paraLevel;
2460 pBiDi->direction=paraLevel&1;
2463 pBiDi->dirProps=NULL;
2466 pBiDi->insertPoints.size=0; /* clean up from last call */
2467 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2470 * Save the original paraLevel if contextual; otherwise, set to 0.
2472 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2476 * For an empty paragraph, create a UBiDi object with the paraLevel and
2477 * the flags and the direction set but without allocating zero-length arrays.
2478 * There is nothing more to do.
2480 if(IS_DEFAULT_LEVEL(paraLevel)) {
2481 pBiDi->paraLevel&=1;
2482 pBiDi->defaultParaLevel=0;
2484 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2487 setParaSuccess(pBiDi); /* mark successful setPara */
2493 /* allocate paras memory */
2494 if(pBiDi->parasMemory)
2495 pBiDi->paras=pBiDi->parasMemory;
2497 pBiDi->paras=pBiDi->simpleParas;
2500 * Get the directional properties,
2501 * the flags bit-set, and
2502 * determine the paragraph level if necessary.
2504 if(getDirPropsMemory(pBiDi, length)) {
2505 pBiDi->dirProps=pBiDi->dirPropsMemory;
2506 if(!getDirProps(pBiDi)) {
2507 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2511 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2514 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2515 length= pBiDi->length;
2516 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
2518 /* are explicit levels specified? */
2519 if(embeddingLevels==NULL) {
2520 /* no: determine explicit levels according to the (Xn) rules */\
2521 if(getLevelsMemory(pBiDi, length)) {
2522 pBiDi->levels=pBiDi->levelsMemory;
2523 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2524 if(U_FAILURE(*pErrorCode)) {
2528 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2532 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2533 pBiDi->levels=embeddingLevels;
2534 direction=checkExplicitLevels(pBiDi, pErrorCode);
2535 if(U_FAILURE(*pErrorCode)) {
2540 /* allocate isolate memory */
2541 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
2542 pBiDi->isolates=pBiDi->simpleIsolates;
2544 if(pBiDi->isolateCount<=pBiDi->isolatesSize)
2545 pBiDi->isolates=pBiDi->isolatesMemory;
2547 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2548 pBiDi->isolates=pBiDi->isolatesMemory;
2550 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2554 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2557 * The steps after (X9) in the UBiDi algorithm are performed only if
2558 * the paragraph text has mixed directionality!
2560 pBiDi->direction=direction;
2563 /* make sure paraLevel is even */
2564 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
2566 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2567 pBiDi->trailingWSStart=0;
2570 /* make sure paraLevel is odd */
2571 pBiDi->paraLevel|=1;
2573 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2574 pBiDi->trailingWSStart=0;
2578 * Choose the right implicit state table
2580 switch(pBiDi->reorderingMode) {
2581 case UBIDI_REORDER_DEFAULT:
2582 pBiDi->pImpTabPair=&impTab_DEFAULT;
2584 case UBIDI_REORDER_NUMBERS_SPECIAL:
2585 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2587 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2588 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2590 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2591 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2593 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2594 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2595 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2597 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2600 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2601 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2602 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2604 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2608 /* we should never get here */
2613 * If there are no external levels specified and there
2614 * are no significant explicit level codes in the text,
2615 * then we can treat the entire paragraph as one run.
2616 * Otherwise, we need to perform the following rules on runs of
2617 * the text with the same embedding levels. (X10)
2618 * "Significant" explicit level codes are ones that actually
2619 * affect non-BN characters.
2620 * Examples for "insignificant" ones are empty embeddings
2621 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2623 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2624 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2625 resolveImplicitLevels(pBiDi, 0, length,
2626 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2627 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2629 /* sor, eor: start and end types of same-level-run */
2630 UBiDiLevel *levels=pBiDi->levels;
2631 int32_t start, limit=0;
2632 UBiDiLevel level, nextLevel;
2635 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2636 level=GET_PARALEVEL(pBiDi, 0);
2637 nextLevel=levels[0];
2638 if(level<nextLevel) {
2639 eor=GET_LR_FROM_LEVEL(nextLevel);
2641 eor=GET_LR_FROM_LEVEL(level);
2645 /* determine start and limit of the run (end points just behind the run) */
2647 /* the values for this run's start are the same as for the previous run's end */
2650 if((start>0) && (pBiDi->dirProps[start-1]==B)) {
2651 /* except if this is a new paragraph, then set sor = para level */
2652 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2657 /* search for the limit of this run */
2658 while(++limit<length && levels[limit]==level) {}
2660 /* get the correct level of the next run */
2662 nextLevel=levels[limit];
2664 nextLevel=GET_PARALEVEL(pBiDi, length-1);
2667 /* determine eor from max(level, nextLevel); sor is last run's eor */
2668 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
2669 eor=GET_LR_FROM_LEVEL(nextLevel);
2671 eor=GET_LR_FROM_LEVEL(level);
2674 /* if the run consists of overridden directional types, then there
2675 are no implicit types to be resolved */
2676 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2677 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2679 /* remove the UBIDI_LEVEL_OVERRIDE flags */
2681 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2682 } while(start<limit);
2684 } while(limit<length);
2686 /* check if we got any memory shortage while adding insert points */
2687 if (U_FAILURE(pBiDi->insertPoints.errorCode))
2689 *pErrorCode=pBiDi->insertPoints.errorCode;
2692 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2693 adjustWSLevels(pBiDi);
2696 /* add RLM for inverse Bidi with contextual orientation resolving
2697 * to RTL which would not round-trip otherwise
2699 if((pBiDi->defaultParaLevel>0) &&
2700 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2701 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2702 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2703 int32_t i, j, start, last;
2706 for(i=0; i<pBiDi->paraCount; i++) {
2707 last=(pBiDi->paras[i].limit)-1;
2708 level=pBiDi->paras[i].level;
2710 continue; /* LTR paragraph */
2711 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2712 for(j=last; j>=start; j--) {
2713 dirProp=pBiDi->dirProps[j];
2716 while(pBiDi->dirProps[last]==B) {
2720 addPoint(pBiDi, last, RLM_BEFORE);
2723 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2730 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2731 pBiDi->resultLength -= pBiDi->controlCount;
2733 pBiDi->resultLength += pBiDi->insertPoints.size;
2735 setParaSuccess(pBiDi); /* mark successful setPara */
2738 U_CAPI void U_EXPORT2
2739 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2741 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2745 U_CAPI UBool U_EXPORT2
2746 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2748 return pBiDi->orderParagraphsLTR;
2754 U_CAPI UBiDiDirection U_EXPORT2
2755 ubidi_getDirection(const UBiDi *pBiDi) {
2756 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2757 return pBiDi->direction;
2763 U_CAPI const UChar * U_EXPORT2
2764 ubidi_getText(const UBiDi *pBiDi) {
2765 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2772 U_CAPI int32_t U_EXPORT2
2773 ubidi_getLength(const UBiDi *pBiDi) {
2774 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2775 return pBiDi->originalLength;
2781 U_CAPI int32_t U_EXPORT2
2782 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2783 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2784 return pBiDi->length;
2790 U_CAPI int32_t U_EXPORT2
2791 ubidi_getResultLength(const UBiDi *pBiDi) {
2792 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2793 return pBiDi->resultLength;
2799 /* paragraphs API functions ------------------------------------------------- */
2801 U_CAPI UBiDiLevel U_EXPORT2
2802 ubidi_getParaLevel(const UBiDi *pBiDi) {
2803 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2804 return pBiDi->paraLevel;
2810 U_CAPI int32_t U_EXPORT2
2811 ubidi_countParagraphs(UBiDi *pBiDi) {
2812 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2815 return pBiDi->paraCount;
2819 U_CAPI void U_EXPORT2
2820 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2821 int32_t *pParaStart, int32_t *pParaLimit,
2822 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2825 /* check the argument values */
2826 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2827 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2828 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2830 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2832 paraStart=pBiDi->paras[paraIndex-1].limit;
2836 if(pParaStart!=NULL) {
2837 *pParaStart=paraStart;
2839 if(pParaLimit!=NULL) {
2840 *pParaLimit=pBiDi->paras[paraIndex].limit;
2842 if(pParaLevel!=NULL) {
2843 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2847 U_CAPI int32_t U_EXPORT2
2848 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2849 int32_t *pParaStart, int32_t *pParaLimit,
2850 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2853 /* check the argument values */
2854 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2855 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2856 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2857 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2858 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2860 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2861 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2865 U_CAPI void U_EXPORT2
2866 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2867 const void *newContext, UBiDiClassCallback **oldFn,
2868 const void **oldContext, UErrorCode *pErrorCode)
2870 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2872 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2877 *oldFn = pBiDi->fnClassCallback;
2881 *oldContext = pBiDi->coClassCallback;
2883 pBiDi->fnClassCallback = newFn;
2884 pBiDi->coClassCallback = newContext;
2887 U_CAPI void U_EXPORT2
2888 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2895 *fn = pBiDi->fnClassCallback;
2899 *context = pBiDi->coClassCallback;
2903 U_CAPI UCharDirection U_EXPORT2
2904 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2908 if( pBiDi->fnClassCallback == NULL ||
2909 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2911 dir = ubidi_getClass(pBiDi->bdp, c);
2913 if(dir >= U_CHAR_DIRECTION_COUNT) {