1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: ubidiln.c
12 * tab size: 8 (not used)
15 * created on: 1999aug06
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uchar.h"
23 #include "unicode/ubidi.h"
28 * General remarks about the functions in this file:
30 * These functions deal with the aspects of potentially mixed-directional
31 * text in a single paragraph or in a line of a single paragraph
32 * which has already been processed according to
33 * the Unicode 6.3 BiDi algorithm as defined in
34 * http://www.unicode.org/unicode/reports/tr9/ , version 28,
35 * also described in The Unicode Standard, Version 6.3.0 .
37 * This means that there is a UBiDi object with a levels
38 * and a dirProps array.
39 * paraLevel and direction are also set.
40 * Only if the length of the text is zero, then levels==dirProps==NULL.
42 * The overall directionality of the paragraph
43 * or line is used to bypass the reordering steps if possible.
44 * Even purely RTL text does not need reordering there because
45 * the ubidi_getLogical/VisualIndex() functions can compute the
46 * index on the fly in such a case.
48 * The implementation of the access to same-level-runs and of the reordering
49 * do attempt to provide better performance and less memory usage compared to
50 * a direct implementation of especially rule (L2) with an array of
51 * one (32-bit) integer per text character.
53 * Here, the levels array is scanned as soon as necessary, and a vector of
54 * same-level-runs is created. Reordering then is done on this vector.
55 * For each run of text positions that were resolved to the same level,
56 * only 8 bytes are stored: the first text position of the run and the visual
57 * position behind the run after reordering.
58 * One sign bit is used to hold the directionality of the run.
59 * This is inefficient if there are many very short runs. If the average run
60 * length is <2, then this uses more memory.
62 * In a further attempt to save memory, the levels array is never changed
63 * after all the resolution rules (Xn, Wn, Nn, In).
64 * Many functions have to consider the field trailingWSStart:
65 * if it is less than length, then there is an implicit trailing run
67 * which is not reflected in the levels array.
68 * This allows a line UBiDi object to use the same levels array as
69 * its paragraph parent object.
71 * When a UBiDi object is created for a line of a paragraph, then the
72 * paragraph's levels and dirProps arrays are reused by way of setting
73 * a pointer into them, not by copying. This again saves memory and forbids to
74 * change the now shared levels for (L1).
77 /* handle trailing WS (L1) -------------------------------------------------- */
80 * setTrailingWSStart() sets the start index for a trailing
81 * run of WS in the line. This is necessary because we do not modify
82 * the paragraph's levels array that we just point into.
83 * Using trailingWSStart is another form of performing (L1).
85 * To make subsequent operations easier, we also include the run
86 * before the WS if it is at the paraLevel - we merge the two here.
88 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
89 * set correctly for the line even when contextual multiple paragraphs.
92 setTrailingWSStart(UBiDi *pBiDi) {
93 /* pBiDi->direction!=UBIDI_MIXED */
95 const DirProp *dirProps=pBiDi->dirProps;
96 UBiDiLevel *levels=pBiDi->levels;
97 int32_t start=pBiDi->length;
98 UBiDiLevel paraLevel=pBiDi->paraLevel;
100 /* If the line is terminated by a block separator, all preceding WS etc...
101 are already set to paragraph level.
102 Setting trailingWSStart to pBidi->length will avoid changing the
103 level of B chars from 0 to paraLevel in ubidi_getLevels when
104 orderParagraphsLTR==TRUE.
106 if(dirProps[start-1]==B) {
107 pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
110 /* go backwards across all WS, BN, explicit codes */
111 while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
115 /* if the WS run can be merged with the previous run then do so here */
116 while(start>0 && levels[start-1]==paraLevel) {
120 pBiDi->trailingWSStart=start;
123 /* ubidi_setLine ------------------------------------------------------------ */
125 U_CAPI void U_EXPORT2
126 ubidi_setLine(const UBiDi *pParaBiDi,
127 int32_t start, int32_t limit,
129 UErrorCode *pErrorCode) {
132 /* check the argument values */
133 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
134 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
135 RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
136 RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
137 if(pLineBiDi==NULL) {
138 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
141 if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) !=
142 ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) {
143 /* the line crosses a paragraph boundary */
144 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
148 /* set the values in pLineBiDi from its pParaBiDi parent */
149 pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */
150 pLineBiDi->text=pParaBiDi->text+start;
151 length=pLineBiDi->length=limit-start;
152 pLineBiDi->resultLength=pLineBiDi->originalLength=length;
153 pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
154 pLineBiDi->paraCount=pParaBiDi->paraCount;
155 pLineBiDi->runs=NULL;
157 pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
158 pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
159 pLineBiDi->controlCount=0;
160 if(pParaBiDi->controlCount>0) {
162 for(j=start; j<limit; j++) {
163 if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
164 pLineBiDi->controlCount++;
167 pLineBiDi->resultLength-=pLineBiDi->controlCount;
170 pLineBiDi->dirProps=pParaBiDi->dirProps+start;
171 pLineBiDi->levels=pParaBiDi->levels+start;
172 pLineBiDi->runCount=-1;
174 if(pParaBiDi->direction!=UBIDI_MIXED) {
175 /* the parent is already trivial */
176 pLineBiDi->direction=pParaBiDi->direction;
179 * The parent's levels are all either
180 * implicitly or explicitly ==paraLevel;
183 if(pParaBiDi->trailingWSStart<=start) {
184 pLineBiDi->trailingWSStart=0;
185 } else if(pParaBiDi->trailingWSStart<limit) {
186 pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
188 pLineBiDi->trailingWSStart=length;
191 const UBiDiLevel *levels=pLineBiDi->levels;
192 int32_t i, trailingWSStart;
195 setTrailingWSStart(pLineBiDi);
196 trailingWSStart=pLineBiDi->trailingWSStart;
198 /* recalculate pLineBiDi->direction */
199 if(trailingWSStart==0) {
200 /* all levels are at paraLevel */
201 pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
203 /* get the level of the first character */
204 level=(UBiDiLevel)(levels[0]&1);
206 /* if there is anything of a different level, then the line is mixed */
207 if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
208 /* the trailing WS is at paraLevel, which differs from levels[0] */
209 pLineBiDi->direction=UBIDI_MIXED;
211 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
214 if(i==trailingWSStart) {
215 /* the direction values match those in level */
216 pLineBiDi->direction=(UBiDiDirection)level;
218 } else if((levels[i]&1)!=level) {
219 pLineBiDi->direction=UBIDI_MIXED;
227 switch(pLineBiDi->direction) {
229 /* make sure paraLevel is even */
230 pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
232 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
233 pLineBiDi->trailingWSStart=0;
236 /* make sure paraLevel is odd */
237 pLineBiDi->paraLevel|=1;
239 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
240 pLineBiDi->trailingWSStart=0;
246 pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
250 U_CAPI UBiDiLevel U_EXPORT2
251 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
252 /* return paraLevel if in the trailing WS run, otherwise the real level */
253 if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
255 } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
256 return GET_PARALEVEL(pBiDi, charIndex);
258 return pBiDi->levels[charIndex];
262 U_CAPI const UBiDiLevel * U_EXPORT2
263 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
264 int32_t start, length;
266 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL);
267 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL);
268 if((length=pBiDi->length)<=0) {
269 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
272 if((start=pBiDi->trailingWSStart)==length) {
273 /* the current levels array reflects the WS run */
274 return pBiDi->levels;
278 * After the previous if(), we know that the levels array
279 * has an implicit trailing WS run and therefore does not fully
280 * reflect itself all the levels.
281 * This must be a UBiDi object for a line, and
282 * we need to create a new levels array.
284 if(getLevelsMemory(pBiDi, length)) {
285 UBiDiLevel *levels=pBiDi->levelsMemory;
287 if(start>0 && levels!=pBiDi->levels) {
288 uprv_memcpy(levels, pBiDi->levels, start);
290 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
291 since pBidi is a line object */
292 uprv_memset(levels+start, pBiDi->paraLevel, length-start);
294 /* this new levels array is set for the line and reflects the WS run */
295 pBiDi->trailingWSStart=length;
296 return pBiDi->levels=levels;
299 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
304 U_CAPI void U_EXPORT2
305 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
306 int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
307 UErrorCode errorCode;
308 int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
311 errorCode=U_ZERO_ERROR;
312 RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
313 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
314 runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
315 if(U_FAILURE(errorCode)) {
318 /* this is done based on runs rather than on levels since levels have
319 a special interpretation when UBIDI_REORDER_RUNS_ONLY
321 visualStart=logicalLimit=0;
324 for(i=0; i<runCount; i++) {
325 iRun = pBiDi->runs[i];
326 logicalFirst=GET_INDEX(iRun.logicalStart);
327 logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
328 if((logicalPosition>=logicalFirst) &&
329 (logicalPosition<logicalLimit)) {
332 visualStart = iRun.visualLimit;
335 *pLogicalLimit=logicalLimit;
338 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
339 *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
341 else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
342 *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
344 *pLevel=pBiDi->levels[logicalPosition];
349 /* runs API functions ------------------------------------------------------- */
351 U_CAPI int32_t U_EXPORT2
352 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
353 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
354 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
355 ubidi_getRuns(pBiDi, pErrorCode);
356 if(U_FAILURE(*pErrorCode)) {
359 return pBiDi->runCount;
362 U_CAPI UBiDiDirection U_EXPORT2
363 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
364 int32_t *pLogicalStart, int32_t *pLength)
367 UErrorCode errorCode = U_ZERO_ERROR;
368 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
369 ubidi_getRuns(pBiDi, &errorCode);
370 if(U_FAILURE(errorCode)) {
373 RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
375 start=pBiDi->runs[runIndex].logicalStart;
376 if(pLogicalStart!=NULL) {
377 *pLogicalStart=GET_INDEX(start);
381 *pLength=pBiDi->runs[runIndex].visualLimit-
382 pBiDi->runs[runIndex-1].visualLimit;
384 *pLength=pBiDi->runs[0].visualLimit;
387 return (UBiDiDirection)GET_ODD_BIT(start);
390 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
392 getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
393 /* simple, single-run case */
394 pBiDi->runs=pBiDi->simpleRuns;
397 /* fill and reorder the single run */
398 pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
399 pBiDi->runs[0].visualLimit=pBiDi->length;
400 pBiDi->runs[0].insertRemove=0;
403 /* reorder the runs array (L2) ---------------------------------------------- */
406 * Reorder the same-level runs in the runs array.
407 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
408 * All the visualStart fields=logical start before reordering.
409 * The "odd" bits are not set yet.
411 * Reordering with this data structure lends itself to some handy shortcuts:
413 * Since each run is moved but not modified, and since at the initial maxLevel
414 * each sequence of same-level runs consists of only one run each, we
415 * don't need to do anything there and can predecrement maxLevel.
416 * In many simple cases, the reordering is thus done entirely in the
418 * Also, reordering occurs only down to the lowest odd level that occurs,
419 * which is minLevel|1. However, if the lowest level itself is odd, then
420 * in the last reordering the sequence of the runs at this level or higher
421 * will be all runs, and we don't need the elaborate loop to search for them.
422 * This is covered by ++minLevel instead of minLevel|=1 followed
423 * by an extra reorder-all after the reorder-some loop.
424 * About a trailing WS run:
425 * Such a run would need special treatment because its level is not
426 * reflected in levels[] if this is not a paragraph object.
427 * Instead, all characters from trailingWSStart on are implicitly at
429 * However, for all maxLevel>paraLevel, this run will never be reordered
430 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
431 * if minLevel==paraLevel is odd, which is done in the extra segment.
432 * This means that for the main reordering loop we don't need to consider
433 * this run and can --runCount. If it is later part of the all-runs
434 * reordering, then runCount is adjusted accordingly.
437 reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
440 int32_t firstRun, endRun, limitRun, runCount;
443 if(maxLevel<=(minLevel|1)) {
448 * Reorder only down to the lowest odd level
449 * and reorder at an odd minLevel in a separate, simpler loop.
450 * See comments above for why minLevel is always incremented.
455 levels=pBiDi->levels;
456 runCount=pBiDi->runCount;
458 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
459 if(pBiDi->trailingWSStart<pBiDi->length) {
463 while(--maxLevel>=minLevel) {
466 /* loop for all sequences of runs */
468 /* look for a sequence of runs that are all at >=maxLevel */
469 /* look for the first run of such a sequence */
470 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
473 if(firstRun>=runCount) {
474 break; /* no more such runs */
477 /* look for the limit run of such a sequence (the run behind it) */
478 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
480 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
482 while(firstRun<endRun) {
483 tempRun = runs[firstRun];
484 runs[firstRun]=runs[endRun];
485 runs[endRun]=tempRun;
490 if(limitRun==runCount) {
491 break; /* no more such runs */
498 /* now do maxLevel==old minLevel (==odd!), see above */
502 /* include the trailing WS run in this complete reordering */
503 if(pBiDi->trailingWSStart==pBiDi->length) {
507 /* Swap the entire sequence of all runs. (endRun==runCount) */
508 while(firstRun<runCount) {
509 tempRun=runs[firstRun];
510 runs[firstRun]=runs[runCount];
511 runs[runCount]=tempRun;
518 /* compute the runs array --------------------------------------------------- */
520 static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
521 Run *runs=pBiDi->runs;
522 int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
524 for(i=0; i<runCount; i++) {
525 length=runs[i].visualLimit-visualStart;
526 logicalStart=GET_INDEX(runs[i].logicalStart);
527 if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
532 /* we should never get here */
534 *pErrorCode = U_INVALID_STATE_ERROR;
539 * Compute the runs array from the levels array.
540 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
541 * and the runs are reordered.
542 * Odd-level runs have visualStart on their visual right edge and
543 * they progress visually to the left.
544 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
545 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
546 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
547 * negative number of BiDi control characters within this run.
550 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
552 * This method returns immediately if the runs are already set. This
553 * includes the case of length==0 (handled in setPara)..
555 if (pBiDi->runCount>=0) {
559 if(pBiDi->direction!=UBIDI_MIXED) {
560 /* simple, single-run case - this covers length==0 */
561 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
562 getSingleRun(pBiDi, pBiDi->paraLevel);
563 } else /* UBIDI_MIXED, length>0 */ {
564 /* mixed directionality */
565 int32_t length=pBiDi->length, limit;
566 UBiDiLevel *levels=pBiDi->levels;
568 UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
570 * If there are WS characters at the end of the line
571 * and the run preceding them has a level different from
572 * paraLevel, then they will form their own run at paraLevel (L1).
573 * Count them separately.
574 * We need some special treatment for this in order to not
575 * modify the levels array which a line UBiDi object shares
576 * with its paragraph parent and its other line siblings.
577 * In other words, for the trailing WS, it may be
578 * levels[]!=paraLevel but we have to treat it like it were so.
580 limit=pBiDi->trailingWSStart;
581 /* count the runs, there is at least one non-WS run, and limit>0 */
583 for(i=0; i<limit; ++i) {
584 /* increment runCount at the start of each run */
585 if(levels[i]!=level) {
592 * We don't need to see if the last run can be merged with a trailing
593 * WS run because setTrailingWSStart() would have done that.
595 if(runCount==1 && limit==length) {
596 /* There is only one non-WS run and no trailing WS-run. */
597 getSingleRun(pBiDi, levels[0]);
598 } else /* runCount>1 || limit<length */ {
599 /* allocate and set the runs */
601 int32_t runIndex, start;
602 UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
604 /* now, count a (non-mergeable) WS run */
610 if(getRunsMemory(pBiDi, runCount)) {
611 runs=pBiDi->runsMemory;
617 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
618 * 464->444, 484->444, 575->555, 595->555
619 * However, that would take longer. Check also how it would
620 * interact with BiDi control removal and inserting Marks.
624 /* search for the run limits and initialize visualLimit values with the run lengths */
627 /* prepare this run */
637 /* look for the run limit */
638 while(++i<limit && levels[i]==level) {}
640 /* i is another run limit */
641 runs[runIndex].logicalStart=start;
642 runs[runIndex].visualLimit=i-start;
643 runs[runIndex].insertRemove=0;
648 /* there is a separate WS run */
649 runs[runIndex].logicalStart=limit;
650 runs[runIndex].visualLimit=length-limit;
651 /* For the trailing WS run, pBiDi->paraLevel is ok even
652 if contextual multiple paragraphs. */
653 if(pBiDi->paraLevel<minLevel) {
654 minLevel=pBiDi->paraLevel;
658 /* set the object fields */
660 pBiDi->runCount=runCount;
662 reorderLine(pBiDi, minLevel, maxLevel);
664 /* now add the direction flags and adjust the visualLimit's to be just that */
665 /* this loop will also handle the trailing WS run */
667 for(i=0; i<runCount; ++i) {
668 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
669 limit+=runs[i].visualLimit;
670 runs[i].visualLimit=limit;
673 /* Set the "odd" bit for the trailing WS run. */
674 /* For a RTL paragraph, it will be the *first* run in visual order. */
675 /* For the trailing WS run, pBiDi->paraLevel is ok even if
676 contextual multiple paragraphs. */
677 if(runIndex<runCount) {
678 int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
680 ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
685 /* handle insert LRM/RLM BEFORE/AFTER run */
686 if(pBiDi->insertPoints.size>0) {
687 Point *point, *start=pBiDi->insertPoints.points,
688 *limit=start+pBiDi->insertPoints.size;
690 for(point=start; point<limit; point++) {
691 runIndex=getRunFromLogicalIndex(pBiDi, point->pos, pErrorCode);
692 pBiDi->runs[runIndex].insertRemove|=point->flag;
696 /* handle remove BiDi control characters */
697 if(pBiDi->controlCount>0) {
699 const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
700 for(pu=start; pu<limit; pu++) {
701 if(IS_BIDI_CONTROL_CHAR(*pu)) {
702 runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start), pErrorCode);
703 pBiDi->runs[runIndex].insertRemove--;
712 prepareReorder(const UBiDiLevel *levels, int32_t length,
714 UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
716 UBiDiLevel level, minLevel, maxLevel;
718 if(levels==NULL || length<=0) {
722 /* determine minLevel and maxLevel */
723 minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
725 for(start=length; start>0;) {
726 level=levels[--start];
727 if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
740 /* initialize the index map */
741 for(start=length; start>0;) {
743 indexMap[start]=start;
749 /* reorder a line based on a levels array (L2) ------------------------------ */
751 U_CAPI void U_EXPORT2
752 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
753 int32_t start, limit, sumOfSosEos;
754 UBiDiLevel minLevel = 0, maxLevel = 0;
756 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
761 if(minLevel==maxLevel && (minLevel&1)==0) {
765 /* reorder only down to the lowest odd level */
768 /* loop maxLevel..minLevel */
772 /* loop for all sequences of levels to reorder at the current maxLevel */
774 /* look for a sequence of levels that are all at >=maxLevel */
775 /* look for the first index of such a sequence */
776 while(start<length && levels[start]<maxLevel) {
780 break; /* no more such sequences */
783 /* look for the limit of such a sequence (the index behind it) */
784 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
787 * sos=start of sequence, eos=end of sequence
789 * The closed (inclusive) interval from sos to eos includes all the logical
790 * and visual indexes within this sequence. They are logically and
791 * visually contiguous and in the same range.
793 * For each run, the new visual index=sos+eos-old visual index;
794 * we pre-add sos+eos into sumOfSosEos ->
795 * new visual index=sumOfSosEos-old visual index;
797 sumOfSosEos=start+limit-1;
799 /* reorder each index in the sequence */
801 indexMap[start]=sumOfSosEos-indexMap[start];
802 } while(++start<limit);
806 break; /* no more such sequences */
811 } while(--maxLevel>=minLevel);
814 U_CAPI void U_EXPORT2
815 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
816 int32_t start, end, limit, temp;
817 UBiDiLevel minLevel = 0, maxLevel = 0;
819 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
824 if(minLevel==maxLevel && (minLevel&1)==0) {
828 /* reorder only down to the lowest odd level */
831 /* loop maxLevel..minLevel */
835 /* loop for all sequences of levels to reorder at the current maxLevel */
837 /* look for a sequence of levels that are all at >=maxLevel */
838 /* look for the first index of such a sequence */
839 while(start<length && levels[start]<maxLevel) {
843 break; /* no more such runs */
846 /* look for the limit of such a sequence (the index behind it) */
847 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
850 * Swap the entire interval of indexes from start to limit-1.
851 * We don't need to swap the levels for the purpose of this
852 * algorithm: the sequence of levels that we look at does not
857 temp=indexMap[start];
858 indexMap[start]=indexMap[end];
866 break; /* no more such sequences */
871 } while(--maxLevel>=minLevel);
874 /* API functions for logical<->visual mapping ------------------------------- */
876 U_CAPI int32_t U_EXPORT2
877 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
878 int32_t visualIndex=UBIDI_MAP_NOWHERE;
879 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
880 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
881 RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
883 /* we can do the trivial cases without the runs array */
884 switch(pBiDi->direction) {
886 visualIndex=logicalIndex;
889 visualIndex=pBiDi->length-logicalIndex-1;
892 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
893 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
896 Run *runs=pBiDi->runs;
897 int32_t i, visualStart=0, offset, length;
899 /* linear search for the run, search on the visual runs */
900 for(i=0; i<pBiDi->runCount; ++i) {
901 length=runs[i].visualLimit-visualStart;
902 offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
903 if(offset>=0 && offset<length) {
904 if(IS_EVEN_RUN(runs[i].logicalStart)) {
906 visualIndex=visualStart+offset;
909 visualIndex=visualStart+length-offset-1;
911 break; /* exit for loop */
915 if(i>=pBiDi->runCount) {
916 return UBIDI_MAP_NOWHERE;
921 if(pBiDi->insertPoints.size>0) {
922 /* add the number of added marks until the calculated visual index */
923 Run *runs=pBiDi->runs;
924 int32_t i, length, insertRemove;
925 int32_t visualStart=0, markFound=0;
926 for(i=0; ; i++, visualStart+=length) {
927 length=runs[i].visualLimit-visualStart;
928 insertRemove=runs[i].insertRemove;
929 if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
932 /* is it the run containing the visual index? */
933 if(visualIndex<runs[i].visualLimit) {
934 return visualIndex+markFound;
936 if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
941 else if(pBiDi->controlCount>0) {
942 /* subtract the number of controls until the calculated visual index */
943 Run *runs=pBiDi->runs;
944 int32_t i, j, start, limit, length, insertRemove;
945 int32_t visualStart=0, controlFound=0;
946 UChar uchar=pBiDi->text[logicalIndex];
947 /* is the logical index pointing to a control ? */
948 if(IS_BIDI_CONTROL_CHAR(uchar)) {
949 return UBIDI_MAP_NOWHERE;
952 for(i=0; ; i++, visualStart+=length) {
953 length=runs[i].visualLimit-visualStart;
954 insertRemove=runs[i].insertRemove;
955 /* calculated visual index is beyond this run? */
956 if(visualIndex>=runs[i].visualLimit) {
957 controlFound-=insertRemove;
960 /* calculated visual index must be within current run */
961 if(insertRemove==0) {
962 return visualIndex-controlFound;
964 if(IS_EVEN_RUN(runs[i].logicalStart)) {
965 /* LTR: check from run start to logical index */
966 start=runs[i].logicalStart;
969 /* RTL: check from logical index to run end */
970 start=logicalIndex+1;
971 limit=GET_INDEX(runs[i].logicalStart)+length;
973 for(j=start; j<limit; j++) {
974 uchar=pBiDi->text[j];
975 if(IS_BIDI_CONTROL_CHAR(uchar)) {
979 return visualIndex-controlFound;
986 U_CAPI int32_t U_EXPORT2
987 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
989 int32_t i, runCount, start;
990 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
991 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
992 RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
993 /* we can do the trivial cases without the runs array */
994 if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
995 if(pBiDi->direction==UBIDI_LTR) {
998 else if(pBiDi->direction==UBIDI_RTL) {
999 return pBiDi->length-visualIndex-1;
1002 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
1003 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1008 runCount=pBiDi->runCount;
1009 if(pBiDi->insertPoints.size>0) {
1010 /* handle inserted LRM/RLM */
1011 int32_t markFound=0, insertRemove;
1012 int32_t visualStart=0, length;
1014 /* subtract number of marks until visual index */
1015 for(i=0; ; i++, visualStart+=length) {
1016 length=runs[i].visualLimit-visualStart;
1017 insertRemove=runs[i].insertRemove;
1018 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1019 if(visualIndex<=(visualStart+markFound)) {
1020 return UBIDI_MAP_NOWHERE;
1024 /* is adjusted visual index within this run? */
1025 if(visualIndex<(runs[i].visualLimit+markFound)) {
1026 visualIndex-=markFound;
1029 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1030 if(visualIndex==(visualStart+length+markFound)) {
1031 return UBIDI_MAP_NOWHERE;
1037 else if(pBiDi->controlCount>0) {
1038 /* handle removed BiDi control characters */
1039 int32_t controlFound=0, insertRemove, length;
1040 int32_t logicalStart, logicalEnd, visualStart=0, j, k;
1043 /* add number of controls until visual index */
1044 for(i=0; ; i++, visualStart+=length) {
1045 length=runs[i].visualLimit-visualStart;
1046 insertRemove=runs[i].insertRemove;
1047 /* is adjusted visual index beyond current run? */
1048 if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
1049 controlFound-=insertRemove;
1052 /* adjusted visual index is within current run */
1053 if(insertRemove==0) {
1054 visualIndex+=controlFound;
1057 /* count non-control chars until visualIndex */
1058 logicalStart=runs[i].logicalStart;
1059 evenRun=IS_EVEN_RUN(logicalStart);
1060 REMOVE_ODD_BIT(logicalStart);
1061 logicalEnd=logicalStart+length-1;
1062 for(j=0; j<length; j++) {
1063 k= evenRun ? logicalStart+j : logicalEnd-j;
1064 uchar=pBiDi->text[k];
1065 if(IS_BIDI_CONTROL_CHAR(uchar)) {
1068 if((visualIndex+controlFound)==(visualStart+j)) {
1072 visualIndex+=controlFound;
1076 /* handle all cases */
1078 /* linear search for the run */
1079 for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
1081 /* binary search for the run */
1082 int32_t begin=0, limit=runCount;
1084 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1087 if(visualIndex>=runs[i].visualLimit) {
1089 } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
1097 start=runs[i].logicalStart;
1098 if(IS_EVEN_RUN(start)) {
1100 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1102 visualIndex-=runs[i-1].visualLimit;
1104 return start+visualIndex;
1107 return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
1111 U_CAPI void U_EXPORT2
1112 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
1113 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1114 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1115 ubidi_countRuns(pBiDi, pErrorCode);
1116 if(U_FAILURE(*pErrorCode)) {
1118 } else if(indexMap==NULL) {
1119 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1121 /* fill a logical-to-visual index map using the runs[] */
1122 int32_t visualStart, visualLimit, i, j, k;
1123 int32_t logicalStart, logicalLimit;
1124 Run *runs=pBiDi->runs;
1125 if (pBiDi->length<=0) {
1128 if (pBiDi->length>pBiDi->resultLength) {
1129 uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
1133 for(j=0; j<pBiDi->runCount; ++j) {
1134 logicalStart=GET_INDEX(runs[j].logicalStart);
1135 visualLimit=runs[j].visualLimit;
1136 if(IS_EVEN_RUN(runs[j].logicalStart)) {
1138 indexMap[logicalStart++]=visualStart++;
1139 } while(visualStart<visualLimit);
1141 logicalStart+=visualLimit-visualStart; /* logicalLimit */
1143 indexMap[--logicalStart]=visualStart++;
1144 } while(visualStart<visualLimit);
1146 /* visualStart==visualLimit; */
1149 if(pBiDi->insertPoints.size>0) {
1150 int32_t markFound=0, runCount=pBiDi->runCount;
1151 int32_t length, insertRemove;
1153 /* add number of marks found until each index */
1154 for(i=0; i<runCount; i++, visualStart+=length) {
1155 length=runs[i].visualLimit-visualStart;
1156 insertRemove=runs[i].insertRemove;
1157 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1161 logicalStart=GET_INDEX(runs[i].logicalStart);
1162 logicalLimit=logicalStart+length;
1163 for(j=logicalStart; j<logicalLimit; j++) {
1164 indexMap[j]+=markFound;
1167 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1172 else if(pBiDi->controlCount>0) {
1173 int32_t controlFound=0, runCount=pBiDi->runCount;
1174 int32_t length, insertRemove;
1178 /* subtract number of controls found until each index */
1179 for(i=0; i<runCount; i++, visualStart+=length) {
1180 length=runs[i].visualLimit-visualStart;
1181 insertRemove=runs[i].insertRemove;
1182 /* no control found within previous runs nor within this run */
1183 if((controlFound-insertRemove)==0) {
1186 logicalStart=runs[i].logicalStart;
1187 evenRun=IS_EVEN_RUN(logicalStart);
1188 REMOVE_ODD_BIT(logicalStart);
1189 logicalLimit=logicalStart+length;
1190 /* if no control within this run */
1191 if(insertRemove==0) {
1192 for(j=logicalStart; j<logicalLimit; j++) {
1193 indexMap[j]-=controlFound;
1197 for(j=0; j<length; j++) {
1198 k= evenRun ? logicalStart+j : logicalLimit-j-1;
1199 uchar=pBiDi->text[k];
1200 if(IS_BIDI_CONTROL_CHAR(uchar)) {
1202 indexMap[k]=UBIDI_MAP_NOWHERE;
1205 indexMap[k]-=controlFound;
1212 U_CAPI void U_EXPORT2
1213 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
1214 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1215 if(indexMap==NULL) {
1216 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1219 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1220 ubidi_countRuns(pBiDi, pErrorCode);
1221 if(U_SUCCESS(*pErrorCode)) {
1222 /* fill a visual-to-logical index map using the runs[] */
1223 Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
1224 int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
1226 if (pBiDi->resultLength<=0) {
1230 for(; runs<runsLimit; ++runs) {
1231 logicalStart=runs->logicalStart;
1232 visualLimit=runs->visualLimit;
1233 if(IS_EVEN_RUN(logicalStart)) {
1235 *pi++ = logicalStart++;
1236 } while(++visualStart<visualLimit);
1238 REMOVE_ODD_BIT(logicalStart);
1239 logicalStart+=visualLimit-visualStart; /* logicalLimit */
1241 *pi++ = --logicalStart;
1242 } while(++visualStart<visualLimit);
1244 /* visualStart==visualLimit; */
1247 if(pBiDi->insertPoints.size>0) {
1248 int32_t markFound=0, runCount=pBiDi->runCount;
1249 int32_t insertRemove, i, j, k;
1251 /* count all inserted marks */
1252 for(i=0; i<runCount; i++) {
1253 insertRemove=runs[i].insertRemove;
1254 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1257 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1261 /* move back indexes by number of preceding marks */
1262 k=pBiDi->resultLength;
1263 for(i=runCount-1; i>=0 && markFound>0; i--) {
1264 insertRemove=runs[i].insertRemove;
1265 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1266 indexMap[--k]= UBIDI_MAP_NOWHERE;
1269 visualStart= i>0 ? runs[i-1].visualLimit : 0;
1270 for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
1271 indexMap[--k]=indexMap[j];
1273 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1274 indexMap[--k]= UBIDI_MAP_NOWHERE;
1279 else if(pBiDi->controlCount>0) {
1280 int32_t runCount=pBiDi->runCount, logicalEnd;
1281 int32_t insertRemove, length, i, j, k, m;
1286 /* move forward indexes by number of preceding controls */
1288 for(i=0; i<runCount; i++, visualStart+=length) {
1289 length=runs[i].visualLimit-visualStart;
1290 insertRemove=runs[i].insertRemove;
1291 /* if no control found yet, nothing to do in this run */
1292 if((insertRemove==0)&&(k==visualStart)) {
1296 /* if no control in this run */
1297 if(insertRemove==0) {
1298 visualLimit=runs[i].visualLimit;
1299 for(j=visualStart; j<visualLimit; j++) {
1300 indexMap[k++]=indexMap[j];
1304 logicalStart=runs[i].logicalStart;
1305 evenRun=IS_EVEN_RUN(logicalStart);
1306 REMOVE_ODD_BIT(logicalStart);
1307 logicalEnd=logicalStart+length-1;
1308 for(j=0; j<length; j++) {
1309 m= evenRun ? logicalStart+j : logicalEnd-j;
1310 uchar=pBiDi->text[m];
1311 if(!IS_BIDI_CONTROL_CHAR(uchar)) {
1320 U_CAPI void U_EXPORT2
1321 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
1322 if(srcMap!=NULL && destMap!=NULL && length>0) {
1324 int32_t destLength=-1, count=0;
1325 /* find highest value and count positive indexes in srcMap */
1328 if(*--pi>destLength) {
1335 destLength++; /* add 1 for origin 0 */
1336 if(count<destLength) {
1337 /* we must fill unmatched destMap entries with -1 */
1338 uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
1343 destMap[*pi]=--length;