Upstream version 9.37.197.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / source / common / ustrcase.c
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2001-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  ustrcase.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2002feb20
14 *   created by: Markus W. Scherer
15 *
16 *   Implementation file for string casing C API functions.
17 *   Uses functions from uchar.c for basic functionality that requires access
18 *   to the Unicode Character Database (uprops.dat).
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ucasemap.h"
25 #include "unicode/ubrk.h"
26 #include "cmemory.h"
27 #include "ucase.h"
28 #include "ustr_imp.h"
29
30 /* string casing ------------------------------------------------------------ */
31
32 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
33 static U_INLINE int32_t
34 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
35              int32_t result, const UChar *s) {
36     UChar32 c;
37     int32_t length;
38
39     /* decode the result */
40     if(result<0) {
41         /* (not) original code point */
42         c=~result;
43         length=-1;
44     } else if(result<=UCASE_MAX_STRING_LENGTH) {
45         c=U_SENTINEL;
46         length=result;
47     } else {
48         c=result;
49         length=-1;
50     }
51
52     if(destIndex<destCapacity) {
53         /* append the result */
54         if(length<0) {
55             /* code point */
56             UBool isError=FALSE;
57             U16_APPEND(dest, destIndex, destCapacity, c, isError);
58             if(isError) {
59                 /* overflow, nothing written */
60                 destIndex+=U16_LENGTH(c);
61             }
62         } else {
63             /* string */
64             if((destIndex+length)<=destCapacity) {
65                 while(length>0) {
66                     dest[destIndex++]=*s++;
67                     --length;
68                 }
69             } else {
70                 /* overflow */
71                 destIndex+=length;
72             }
73         }
74     } else {
75         /* preflight */
76         if(length<0) {
77             destIndex+=U16_LENGTH(c);
78         } else {
79             destIndex+=length;
80         }
81     }
82     return destIndex;
83 }
84
85 static UChar32 U_CALLCONV
86 utf16_caseContextIterator(void *context, int8_t dir) {
87     UCaseContext *csc=(UCaseContext *)context;
88     UChar32 c;
89
90     if(dir<0) {
91         /* reset for backward iteration */
92         csc->index=csc->cpStart;
93         csc->dir=dir;
94     } else if(dir>0) {
95         /* reset for forward iteration */
96         csc->index=csc->cpLimit;
97         csc->dir=dir;
98     } else {
99         /* continue current iteration direction */
100         dir=csc->dir;
101     }
102
103     if(dir<0) {
104         if(csc->start<csc->index) {
105             U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
106             return c;
107         }
108     } else {
109         if(csc->index<csc->limit) {
110             U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
111             return c;
112         }
113     }
114     return U_SENTINEL;
115 }
116
117 /*
118  * Case-maps [srcStart..srcLimit[ but takes
119  * context [0..srcLength[ into account.
120  */
121 static int32_t
122 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
123          UChar *dest, int32_t destCapacity,
124          const UChar *src, UCaseContext *csc,
125          int32_t srcStart, int32_t srcLimit,
126          UErrorCode *pErrorCode) {
127     const UChar *s;
128     UChar32 c, c2 = 0;
129     int32_t srcIndex, destIndex;
130     int32_t locCache;
131
132     locCache=csm->locCache;
133
134     /* case mapping loop */
135     srcIndex=srcStart;
136     destIndex=0;
137     while(srcIndex<srcLimit) {
138         csc->cpStart=srcIndex;
139         U16_NEXT(src, srcIndex, srcLimit, c);
140         csc->cpLimit=srcIndex;
141         c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
142         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
143             /* fast path version of appendResult() for BMP results */
144             dest[destIndex++]=(UChar)c2;
145         } else {
146             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
147         }
148     }
149
150     if(destIndex>destCapacity) {
151         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
152     }
153     return destIndex;
154 }
155
156 static void
157 setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
158     /*
159      * We could call ucasemap_setLocale(), but here we really only care about
160      * the initial language subtag, we need not return the real string via
161      * ucasemap_getLocale(), and we don't care about only getting "x" from
162      * "x-some-thing" etc.
163      *
164      * We ignore locales with a longer-than-3 initial subtag.
165      *
166      * We also do not fill in the locCache because it is rarely used,
167      * and not worth setting unless we reuse it for many case mapping operations.
168      * (That's why UCaseMap was created.)
169      */
170     int i;
171     char c;
172
173     /* the internal functions require locale!=NULL */
174     if(locale==NULL) {
175         locale=uloc_getDefault();
176     }
177     for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
178         csm->locale[i]=c;
179     }
180     if(i<=3) {
181         csm->locale[i]=0;  /* Up to 3 non-separator characters. */
182     } else {
183         csm->locale[0]=0;  /* Longer-than-3 initial subtag: Ignore. */
184     }
185 }
186
187 /*
188  * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
189  * Do this fast because it is called with every function call.
190  */
191 static U_INLINE void
192 setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
193     if(csm->csp==NULL) {
194         csm->csp=ucase_getSingleton();
195     }
196     if(locale!=NULL && locale[0]==0) {
197         csm->locale[0]=0;
198     } else {
199         setTempCaseMapLocale(csm, locale, pErrorCode);
200     }
201 }
202
203 #if !UCONFIG_NO_BREAK_ITERATION
204
205 /*
206  * Internal titlecasing function.
207  */
208 static int32_t
209 _toTitle(UCaseMap *csm,
210          UChar *dest, int32_t destCapacity,
211          const UChar *src, UCaseContext *csc,
212          int32_t srcLength,
213          UErrorCode *pErrorCode) {
214     const UChar *s;
215     UChar32 c;
216     int32_t prev, titleStart, titleLimit, idx, destIndex, length;
217     UBool isFirstIndex;
218
219     if(csm->iter!=NULL) {
220         ubrk_setText(csm->iter, src, srcLength, pErrorCode);
221     } else {
222         csm->iter=ubrk_open(UBRK_WORD, csm->locale,
223                             src, srcLength,
224                             pErrorCode);
225     }
226     if(U_FAILURE(*pErrorCode)) {
227         return 0;
228     }
229
230     /* set up local variables */
231     destIndex=0;
232     prev=0;
233     isFirstIndex=TRUE;
234
235     /* titlecasing loop */
236     while(prev<srcLength) {
237         /* find next index where to titlecase */
238         if(isFirstIndex) {
239             isFirstIndex=FALSE;
240             idx=ubrk_first(csm->iter);
241         } else {
242             idx=ubrk_next(csm->iter);
243         }
244         if(idx==UBRK_DONE || idx>srcLength) {
245             idx=srcLength;
246         }
247
248         /*
249          * Unicode 4 & 5 section 3.13 Default Case Operations:
250          *
251          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
252          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
253          * cased character F. If F exists, map F to default_title(F); then map each
254          * subsequent character C to default_lower(C).
255          *
256          * In this implementation, segment [prev..index[ into 3 parts:
257          * a) uncased characters (copy as-is) [prev..titleStart[
258          * b) first case letter (titlecase)         [titleStart..titleLimit[
259          * c) subsequent characters (lowercase)                 [titleLimit..index[
260          */
261         if(prev<idx) {
262             /* find and copy uncased characters [prev..titleStart[ */
263             titleStart=titleLimit=prev;
264             U16_NEXT(src, titleLimit, idx, c);
265             if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
266                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
267                 for(;;) {
268                     titleStart=titleLimit;
269                     if(titleLimit==idx) {
270                         /*
271                          * only uncased characters in [prev..index[
272                          * stop with titleStart==titleLimit==index
273                          */
274                         break;
275                     }
276                     U16_NEXT(src, titleLimit, idx, c);
277                     if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
278                         break; /* cased letter at [titleStart..titleLimit[ */
279                     }
280                 }
281                 length=titleStart-prev;
282                 if(length>0) {
283                     if((destIndex+length)<=destCapacity) {
284                         uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
285                     }
286                     destIndex+=length;
287                 }
288             }
289
290             if(titleStart<titleLimit) {
291                 /* titlecase c which is from [titleStart..titleLimit[ */
292                 csc->cpStart=titleStart;
293                 csc->cpLimit=titleLimit;
294                 c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
295                 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 
296
297                 /* Special case Dutch IJ titlecasing */
298                 if ( titleStart+1 < idx && 
299                      ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
300                      ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
301                      ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 
302                             c=(UChar32) 0x004A;
303                             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
304                             titleLimit++;
305                 }
306
307                 /* lowercase [titleLimit..index[ */
308                 if(titleLimit<idx) {
309                     if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
310                         /* Normal operation: Lowercase the rest of the word. */
311                         destIndex+=
312                             _caseMap(
313                                 csm, ucase_toFullLower,
314                                 dest+destIndex, destCapacity-destIndex,
315                                 src, csc,
316                                 titleLimit, idx,
317                                 pErrorCode);
318                     } else {
319                         /* Optionally just copy the rest of the word unchanged. */
320                         length=idx-titleLimit;
321                         if((destIndex+length)<=destCapacity) {
322                             uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
323                         }
324                         destIndex+=length;
325                     }
326                 }
327             }
328         }
329
330         prev=idx;
331     }
332
333     if(destIndex>destCapacity) {
334         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
335     }
336     return destIndex;
337 }
338
339 #endif
340
341 /* functions available in the common library (for unistr_case.cpp) */
342
343 U_CFUNC int32_t
344 ustr_toLower(const UCaseProps *csp,
345              UChar *dest, int32_t destCapacity,
346              const UChar *src, int32_t srcLength,
347              const char *locale,
348              UErrorCode *pErrorCode) {
349     UCaseMap csm={ NULL };
350     UCaseContext csc={ NULL };
351
352     csm.csp=csp;
353     setTempCaseMap(&csm, locale, pErrorCode);
354     csc.p=(void *)src;
355     csc.limit=srcLength;
356
357     return _caseMap(&csm, ucase_toFullLower,
358                     dest, destCapacity,
359                     src, &csc, 0, srcLength,
360                     pErrorCode);
361 }
362
363 U_CFUNC int32_t
364 ustr_toUpper(const UCaseProps *csp,
365              UChar *dest, int32_t destCapacity,
366              const UChar *src, int32_t srcLength,
367              const char *locale,
368              UErrorCode *pErrorCode) {
369     UCaseMap csm={ NULL };
370     UCaseContext csc={ NULL };
371
372     csm.csp=csp;
373     setTempCaseMap(&csm, locale, pErrorCode);
374     csc.p=(void *)src;
375     csc.limit=srcLength;
376
377     return _caseMap(&csm, ucase_toFullUpper,
378                     dest, destCapacity,
379                     src, &csc, 0, srcLength,
380                     pErrorCode);
381 }
382
383 #if !UCONFIG_NO_BREAK_ITERATION
384
385 U_CFUNC int32_t
386 ustr_toTitle(const UCaseProps *csp,
387              UChar *dest, int32_t destCapacity,
388              const UChar *src, int32_t srcLength,
389              UBreakIterator *titleIter,
390              const char *locale, uint32_t options,
391              UErrorCode *pErrorCode) {
392     UCaseMap csm={ NULL };
393     UCaseContext csc={ NULL };
394     int32_t length;
395
396     csm.csp=csp;
397     csm.iter=titleIter;
398     csm.options=options;
399     setTempCaseMap(&csm, locale, pErrorCode);
400     csc.p=(void *)src;
401     csc.limit=srcLength;
402
403     length=_toTitle(&csm,
404                     dest, destCapacity,
405                     src, &csc, srcLength,
406                     pErrorCode);
407     if(titleIter==NULL && csm.iter!=NULL) {
408         ubrk_close(csm.iter);
409     }
410     return length;
411 }
412
413 #endif
414
415 U_CFUNC int32_t
416 ustr_foldCase(const UCaseProps *csp,
417               UChar *dest, int32_t destCapacity,
418               const UChar *src, int32_t srcLength,
419               uint32_t options,
420               UErrorCode *pErrorCode) {
421     int32_t srcIndex, destIndex;
422
423     const UChar *s;
424     UChar32 c, c2 = 0;
425
426     /* case mapping loop */
427     srcIndex=destIndex=0;
428     while(srcIndex<srcLength) {
429         U16_NEXT(src, srcIndex, srcLength, c);
430         c=ucase_toFullFolding(csp, c, &s, options);
431         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
432             /* fast path version of appendResult() for BMP results */
433             dest[destIndex++]=(UChar)c2;
434         } else {
435             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
436         }
437     }
438
439     if(destIndex>destCapacity) {
440         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
441     }
442     return destIndex;
443 }
444
445 /*
446  * Implement argument checking and buffer handling
447  * for string case mapping as a common function.
448  */
449
450 /* common internal function for public API functions */
451
452 static int32_t
453 caseMap(const UCaseMap *csm,
454         UChar *dest, int32_t destCapacity,
455         const UChar *src, int32_t srcLength,
456         int32_t toWhichCase,
457         UErrorCode *pErrorCode) {
458     UChar buffer[300];
459     UChar *temp;
460
461     int32_t destLength;
462
463     /* check argument values */
464     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
465         return 0;
466     }
467     if( destCapacity<0 ||
468         (dest==NULL && destCapacity>0) ||
469         src==NULL ||
470         srcLength<-1
471     ) {
472         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
473         return 0;
474     }
475
476     /* get the string length */
477     if(srcLength==-1) {
478         srcLength=u_strlen(src);
479     }
480
481     /* check for overlapping source and destination */
482     if( dest!=NULL &&
483         ((src>=dest && src<(dest+destCapacity)) ||
484          (dest>=src && dest<(src+srcLength)))
485     ) {
486         /* overlap: provide a temporary destination buffer and later copy the result */
487         if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
488             /* the stack buffer is large enough */
489             temp=buffer;
490         } else {
491             /* allocate a buffer */
492             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
493             if(temp==NULL) {
494                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
495                 return 0;
496             }
497         }
498     } else {
499         temp=dest;
500     }
501
502     destLength=0;
503
504     if(toWhichCase==FOLD_CASE) {
505         destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
506                                  csm->options, pErrorCode);
507     } else {
508         UCaseContext csc={ NULL };
509
510         csc.p=(void *)src;
511         csc.limit=srcLength;
512
513         if(toWhichCase==TO_LOWER) {
514             destLength=_caseMap(csm, ucase_toFullLower,
515                                 temp, destCapacity,
516                                 src, &csc,
517                                 0, srcLength,
518                                 pErrorCode);
519         } else if(toWhichCase==TO_UPPER) {
520             destLength=_caseMap(csm, ucase_toFullUpper,
521                                 temp, destCapacity,
522                                 src, &csc,
523                                 0, srcLength,
524                                 pErrorCode);
525         } else /* if(toWhichCase==TO_TITLE) */ {
526 #if UCONFIG_NO_BREAK_ITERATION
527             *pErrorCode=U_UNSUPPORTED_ERROR;
528 #else
529             /* UCaseMap is actually non-const in toTitle() APIs. */
530             destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
531                                 src, &csc, srcLength,
532                                 pErrorCode);
533 #endif
534         }
535     }
536     if(temp!=dest) {
537         /* copy the result string to the destination buffer */
538         if(destLength>0) {
539             int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
540             if(copyLength>0) {
541                 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
542             }
543         }
544         if(temp!=buffer) {
545             uprv_free(temp);
546         }
547     }
548
549     return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
550 }
551
552 /* public API functions */
553
554 U_CAPI int32_t U_EXPORT2
555 u_strToLower(UChar *dest, int32_t destCapacity,
556              const UChar *src, int32_t srcLength,
557              const char *locale,
558              UErrorCode *pErrorCode) {
559     UCaseMap csm={ NULL };
560     setTempCaseMap(&csm, locale, pErrorCode);
561     return caseMap(&csm,
562                    dest, destCapacity,
563                    src, srcLength,
564                    TO_LOWER, pErrorCode);
565 }
566
567 U_CAPI int32_t U_EXPORT2
568 u_strToUpper(UChar *dest, int32_t destCapacity,
569              const UChar *src, int32_t srcLength,
570              const char *locale,
571              UErrorCode *pErrorCode) {
572     UCaseMap csm={ NULL };
573     setTempCaseMap(&csm, locale, pErrorCode);
574     return caseMap(&csm,
575                    dest, destCapacity,
576                    src, srcLength,
577                    TO_UPPER, pErrorCode);
578 }
579
580 #if !UCONFIG_NO_BREAK_ITERATION
581
582 U_CAPI int32_t U_EXPORT2
583 u_strToTitle(UChar *dest, int32_t destCapacity,
584              const UChar *src, int32_t srcLength,
585              UBreakIterator *titleIter,
586              const char *locale,
587              UErrorCode *pErrorCode) {
588     UCaseMap csm={ NULL };
589     int32_t length;
590
591     csm.iter=titleIter;
592     setTempCaseMap(&csm, locale, pErrorCode);
593     length=caseMap(&csm,
594                    dest, destCapacity,
595                    src, srcLength,
596                    TO_TITLE, pErrorCode);
597     if(titleIter==NULL && csm.iter!=NULL) {
598         ubrk_close(csm.iter);
599     }
600     return length;
601 }
602
603 U_CAPI int32_t U_EXPORT2
604 ucasemap_toTitle(UCaseMap *csm,
605                  UChar *dest, int32_t destCapacity,
606                  const UChar *src, int32_t srcLength,
607                  UErrorCode *pErrorCode) {
608     return caseMap(csm,
609                    dest, destCapacity,
610                    src, srcLength,
611                    TO_TITLE, pErrorCode);
612 }
613
614 #endif
615
616 U_CAPI int32_t U_EXPORT2
617 u_strFoldCase(UChar *dest, int32_t destCapacity,
618               const UChar *src, int32_t srcLength,
619               uint32_t options,
620               UErrorCode *pErrorCode) {
621     UCaseMap csm={ NULL };
622     csm.csp=ucase_getSingleton();
623     csm.options=options;
624     return caseMap(&csm,
625                    dest, destCapacity,
626                    src, srcLength,
627                    FOLD_CASE, pErrorCode);
628 }
629
630 /* case-insensitive string comparisons -------------------------------------- */
631
632 /*
633  * This function is a copy of unorm_cmpEquivFold() minus the parts for
634  * canonical equivalence.
635  * Keep the functions in sync, and see there for how this works.
636  * The duplication is for modularization:
637  * It makes caseless (but not canonical caseless) matches independent of
638  * the normalization code.
639  */
640
641 /* stack element for previous-level source/decomposition pointers */
642 struct CmpEquivLevel {
643     const UChar *start, *s, *limit;
644 };
645 typedef struct CmpEquivLevel CmpEquivLevel;
646
647 /* internal function */
648 U_CFUNC int32_t
649 u_strcmpFold(const UChar *s1, int32_t length1,
650              const UChar *s2, int32_t length2,
651              uint32_t options,
652              UErrorCode *pErrorCode) {
653     const UCaseProps *csp;
654
655     /* current-level start/limit - s1/s2 as current */
656     const UChar *start1, *start2, *limit1, *limit2;
657
658     /* case folding variables */
659     const UChar *p;
660     int32_t length;
661
662     /* stacks of previous-level start/current/limit */
663     CmpEquivLevel stack1[2], stack2[2];
664
665     /* case folding buffers, only use current-level start/limit */
666     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
667
668     /* track which is the current level per string */
669     int32_t level1, level2;
670
671     /* current code units, and code points for lookups */
672     UChar32 c1, c2, cp1, cp2;
673
674     /* no argument error checking because this itself is not an API */
675
676     /*
677      * assume that at least the option U_COMPARE_IGNORE_CASE is set
678      * otherwise this function would have to behave exactly as uprv_strCompare()
679      */
680     csp=ucase_getSingleton();
681     if(U_FAILURE(*pErrorCode)) {
682         return 0;
683     }
684
685     /* initialize */
686     start1=s1;
687     if(length1==-1) {
688         limit1=NULL;
689     } else {
690         limit1=s1+length1;
691     }
692
693     start2=s2;
694     if(length2==-1) {
695         limit2=NULL;
696     } else {
697         limit2=s2+length2;
698     }
699
700     level1=level2=0;
701     c1=c2=-1;
702
703     /* comparison loop */
704     for(;;) {
705         /*
706          * here a code unit value of -1 means "get another code unit"
707          * below it will mean "this source is finished"
708          */
709
710         if(c1<0) {
711             /* get next code unit from string 1, post-increment */
712             for(;;) {
713                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
714                     if(level1==0) {
715                         c1=-1;
716                         break;
717                     }
718                 } else {
719                     ++s1;
720                     break;
721                 }
722
723                 /* reached end of level buffer, pop one level */
724                 do {
725                     --level1;
726                     start1=stack1[level1].start;
727                 } while(start1==NULL);
728                 s1=stack1[level1].s;
729                 limit1=stack1[level1].limit;
730             }
731         }
732
733         if(c2<0) {
734             /* get next code unit from string 2, post-increment */
735             for(;;) {
736                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
737                     if(level2==0) {
738                         c2=-1;
739                         break;
740                     }
741                 } else {
742                     ++s2;
743                     break;
744                 }
745
746                 /* reached end of level buffer, pop one level */
747                 do {
748                     --level2;
749                     start2=stack2[level2].start;
750                 } while(start2==NULL);
751                 s2=stack2[level2].s;
752                 limit2=stack2[level2].limit;
753             }
754         }
755
756         /*
757          * compare c1 and c2
758          * either variable c1, c2 is -1 only if the corresponding string is finished
759          */
760         if(c1==c2) {
761             if(c1<0) {
762                 return 0;   /* c1==c2==-1 indicating end of strings */
763             }
764             c1=c2=-1;       /* make us fetch new code units */
765             continue;
766         } else if(c1<0) {
767             return -1;      /* string 1 ends before string 2 */
768         } else if(c2<0) {
769             return 1;       /* string 2 ends before string 1 */
770         }
771         /* c1!=c2 && c1>=0 && c2>=0 */
772
773         /* get complete code points for c1, c2 for lookups if either is a surrogate */
774         cp1=c1;
775         if(U_IS_SURROGATE(c1)) {
776             UChar c;
777
778             if(U_IS_SURROGATE_LEAD(c1)) {
779                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
780                     /* advance ++s1; only below if cp1 decomposes/case-folds */
781                     cp1=U16_GET_SUPPLEMENTARY(c1, c);
782                 }
783             } else /* isTrail(c1) */ {
784                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
785                     cp1=U16_GET_SUPPLEMENTARY(c, c1);
786                 }
787             }
788         }
789
790         cp2=c2;
791         if(U_IS_SURROGATE(c2)) {
792             UChar c;
793
794             if(U_IS_SURROGATE_LEAD(c2)) {
795                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
796                     /* advance ++s2; only below if cp2 decomposes/case-folds */
797                     cp2=U16_GET_SUPPLEMENTARY(c2, c);
798                 }
799             } else /* isTrail(c2) */ {
800                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
801                     cp2=U16_GET_SUPPLEMENTARY(c, c2);
802                 }
803             }
804         }
805
806         /*
807          * go down one level for each string
808          * continue with the main loop as soon as there is a real change
809          */
810
811         if( level1==0 &&
812             (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
813         ) {
814             /* cp1 case-folds to the code point "length" or to p[length] */
815             if(U_IS_SURROGATE(c1)) {
816                 if(U_IS_SURROGATE_LEAD(c1)) {
817                     /* advance beyond source surrogate pair if it case-folds */
818                     ++s1;
819                 } else /* isTrail(c1) */ {
820                     /*
821                      * we got a supplementary code point when hitting its trail surrogate,
822                      * therefore the lead surrogate must have been the same as in the other string;
823                      * compare this decomposition with the lead surrogate in the other string
824                      * remember that this simulates bulk text replacement:
825                      * the decomposition would replace the entire code point
826                      */
827                     --s2;
828                     c2=*(s2-1);
829                 }
830             }
831
832             /* push current level pointers */
833             stack1[0].start=start1;
834             stack1[0].s=s1;
835             stack1[0].limit=limit1;
836             ++level1;
837
838             /* copy the folding result to fold1[] */
839             if(length<=UCASE_MAX_STRING_LENGTH) {
840                 u_memcpy(fold1, p, length);
841             } else {
842                 int32_t i=0;
843                 U16_APPEND_UNSAFE(fold1, i, length);
844                 length=i;
845             }
846
847             /* set next level pointers to case folding */
848             start1=s1=fold1;
849             limit1=fold1+length;
850
851             /* get ready to read from decomposition, continue with loop */
852             c1=-1;
853             continue;
854         }
855
856         if( level2==0 &&
857             (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
858         ) {
859             /* cp2 case-folds to the code point "length" or to p[length] */
860             if(U_IS_SURROGATE(c2)) {
861                 if(U_IS_SURROGATE_LEAD(c2)) {
862                     /* advance beyond source surrogate pair if it case-folds */
863                     ++s2;
864                 } else /* isTrail(c2) */ {
865                     /*
866                      * we got a supplementary code point when hitting its trail surrogate,
867                      * therefore the lead surrogate must have been the same as in the other string;
868                      * compare this decomposition with the lead surrogate in the other string
869                      * remember that this simulates bulk text replacement:
870                      * the decomposition would replace the entire code point
871                      */
872                     --s1;
873                     c1=*(s1-1);
874                 }
875             }
876
877             /* push current level pointers */
878             stack2[0].start=start2;
879             stack2[0].s=s2;
880             stack2[0].limit=limit2;
881             ++level2;
882
883             /* copy the folding result to fold2[] */
884             if(length<=UCASE_MAX_STRING_LENGTH) {
885                 u_memcpy(fold2, p, length);
886             } else {
887                 int32_t i=0;
888                 U16_APPEND_UNSAFE(fold2, i, length);
889                 length=i;
890             }
891
892             /* set next level pointers to case folding */
893             start2=s2=fold2;
894             limit2=fold2+length;
895
896             /* get ready to read from decomposition, continue with loop */
897             c2=-1;
898             continue;
899         }
900
901         /*
902          * no decomposition/case folding, max level for both sides:
903          * return difference result
904          *
905          * code point order comparison must not just return cp1-cp2
906          * because when single surrogates are present then the surrogate pairs
907          * that formed cp1 and cp2 may be from different string indexes
908          *
909          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
910          * c1=d800 cp1=10001 c2=dc00 cp2=10000
911          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
912          *
913          * therefore, use same fix-up as in ustring.c/uprv_strCompare()
914          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
915          * so we have slightly different pointer/start/limit comparisons here
916          */
917
918         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
919             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
920             if(
921                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
922                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
923             ) {
924                 /* part of a surrogate pair, leave >=d800 */
925             } else {
926                 /* BMP code point - may be surrogate code point - make <d800 */
927                 c1-=0x2800;
928             }
929
930             if(
931                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
932                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
933             ) {
934                 /* part of a surrogate pair, leave >=d800 */
935             } else {
936                 /* BMP code point - may be surrogate code point - make <d800 */
937                 c2-=0x2800;
938             }
939         }
940
941         return c1-c2;
942     }
943 }
944
945 /* public API functions */
946
947 U_CAPI int32_t U_EXPORT2
948 u_strCaseCompare(const UChar *s1, int32_t length1,
949                  const UChar *s2, int32_t length2,
950                  uint32_t options,
951                  UErrorCode *pErrorCode) {
952     /* argument checking */
953     if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
954         return 0;
955     }
956     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
957         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
958         return 0;
959     }
960     return u_strcmpFold(s1, length1, s2, length2,
961                         options|U_COMPARE_IGNORE_CASE,
962                         pErrorCode);
963 }
964
965 U_CAPI int32_t U_EXPORT2
966 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
967     UErrorCode errorCode=U_ZERO_ERROR;
968     return u_strcmpFold(s1, -1, s2, -1,
969                         options|U_COMPARE_IGNORE_CASE,
970                         &errorCode);
971 }
972
973 U_CAPI int32_t U_EXPORT2
974 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
975     UErrorCode errorCode=U_ZERO_ERROR;
976     return u_strcmpFold(s1, length, s2, length,
977                         options|U_COMPARE_IGNORE_CASE,
978                         &errorCode);
979 }
980
981 U_CAPI int32_t U_EXPORT2
982 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
983     UErrorCode errorCode=U_ZERO_ERROR;
984     return u_strcmpFold(s1, n, s2, n,
985                         options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
986                         &errorCode);
987 }