Imported Upstream version 58.1
[platform/upstream/icu.git] / source / common / ucasemap.cpp
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  ucasemap.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005may06
16 *   created by: Markus W. Scherer
17 *
18 *   Case mapping service object and functions using it.
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucasemap.h"
27 #if !UCONFIG_NO_BREAK_ITERATION
28 #include "unicode/utext.h"
29 #endif
30 #include "unicode/utf.h"
31 #include "unicode/utf8.h"
32 #include "unicode/utf16.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "ucase.h"
36 #include "ustr_imp.h"
37
38 U_NAMESPACE_USE
39
40 /* UCaseMap service object -------------------------------------------------- */
41
42 U_CAPI UCaseMap * U_EXPORT2
43 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
44     UCaseMap *csm;
45
46     if(U_FAILURE(*pErrorCode)) {
47         return NULL;
48     }
49
50     csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
51     if(csm==NULL) {
52         return NULL;
53     }
54     uprv_memset(csm, 0, sizeof(UCaseMap));
55
56     csm->csp=ucase_getSingleton();
57     ucasemap_setLocale(csm, locale, pErrorCode);
58     if(U_FAILURE(*pErrorCode)) {
59         uprv_free(csm);
60         return NULL;
61     }
62
63     csm->options=options;
64     return csm;
65 }
66
67 U_CAPI void U_EXPORT2
68 ucasemap_close(UCaseMap *csm) {
69     if(csm!=NULL) {
70 #if !UCONFIG_NO_BREAK_ITERATION
71         // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
72         delete reinterpret_cast<BreakIterator *>(csm->iter);
73 #endif
74         uprv_free(csm);
75     }
76 }
77
78 U_CAPI const char * U_EXPORT2
79 ucasemap_getLocale(const UCaseMap *csm) {
80     return csm->locale;
81 }
82
83 U_CAPI uint32_t U_EXPORT2
84 ucasemap_getOptions(const UCaseMap *csm) {
85     return csm->options;
86 }
87
88 U_CAPI void U_EXPORT2
89 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
90     int32_t length;
91
92     if(U_FAILURE(*pErrorCode)) {
93         return;
94     }
95
96     length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
97     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
98         *pErrorCode=U_ZERO_ERROR;
99         /* we only really need the language code for case mappings */
100         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
101     }
102     if(length==sizeof(csm->locale)) {
103         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
104     }
105     csm->locCache=0;
106     if(U_SUCCESS(*pErrorCode)) {
107         ucase_getCaseLocale(csm->locale, &csm->locCache);
108     } else {
109         csm->locale[0]=0;
110     }
111 }
112
113 U_CAPI void U_EXPORT2
114 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) {
115     csm->options=options;
116 }
117
118 /* UTF-8 string case mappings ----------------------------------------------- */
119
120 /* TODO(markus): Move to a new, separate utf8case.c file. */
121
122 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
123 static inline int32_t
124 appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
125              int32_t result, const UChar *s) {
126     UChar32 c;
127     int32_t length;
128     UErrorCode errorCode;
129
130     /* decode the result */
131     if(result<0) {
132         /* (not) original code point */
133         c=~result;
134         length=U8_LENGTH(c);
135     } else if(result<=UCASE_MAX_STRING_LENGTH) {
136         c=U_SENTINEL;
137         length=result;
138     } else {
139         c=result;
140         length=U8_LENGTH(c);
141     }
142     if(length>(INT32_MAX-destIndex)) {
143         return -1;  // integer overflow
144     }
145
146     if(destIndex<destCapacity) {
147         /* append the result */
148         if(c>=0) {
149             /* code point */
150             UBool isError=FALSE;
151             U8_APPEND(dest, destIndex, destCapacity, c, isError);
152             if(isError) {
153                 /* overflow, nothing written */
154                 destIndex+=length;
155             }
156         } else {
157             /* string */
158             int32_t destLength;
159             errorCode=U_ZERO_ERROR;
160             u_strToUTF8(
161                 (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
162                 s, length,
163                 &errorCode);
164             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
165                 return -1;
166             }
167             if(destLength>(INT32_MAX-destIndex)) {
168                 return -1;  // integer overflow
169             }
170             destIndex+=destLength;
171             /* we might have an overflow, but we know the actual length */
172         }
173     } else {
174         /* preflight */
175         if(c>=0) {
176             destIndex+=length;
177         } else {
178             int32_t destLength;
179             errorCode=U_ZERO_ERROR;
180             u_strToUTF8(
181                 NULL, 0, &destLength,
182                 s, length,
183                 &errorCode);
184             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
185                 return -1;
186             }
187             if(destLength>(INT32_MAX-destIndex)) {
188                 return -1;  // integer overflow
189             }
190             destIndex+=destLength;
191         }
192     }
193     return destIndex;
194 }
195
196 static inline int32_t
197 appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
198     int32_t length=U8_LENGTH(c);
199     if(length>(INT32_MAX-destIndex)) {
200         return -1;  // integer overflow
201     }
202     int32_t limit=destIndex+length;
203     if(limit<destCapacity) {
204         U8_APPEND_UNSAFE(dest, destIndex, c);
205     }
206     return limit;
207 }
208
209 static inline int32_t
210 appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
211              const uint8_t *s, int32_t length) {
212     if(length>0) {
213         if(length>(INT32_MAX-destIndex)) {
214             return -1;  // integer overflow
215         }
216         if((destIndex+length)<=destCapacity) {
217             uprv_memcpy(dest+destIndex, s, length);
218         }
219         destIndex+=length;
220     }
221     return destIndex;
222 }
223
224 static UChar32 U_CALLCONV
225 utf8_caseContextIterator(void *context, int8_t dir) {
226     UCaseContext *csc=(UCaseContext *)context;
227     UChar32 c;
228
229     if(dir<0) {
230         /* reset for backward iteration */
231         csc->index=csc->cpStart;
232         csc->dir=dir;
233     } else if(dir>0) {
234         /* reset for forward iteration */
235         csc->index=csc->cpLimit;
236         csc->dir=dir;
237     } else {
238         /* continue current iteration direction */
239         dir=csc->dir;
240     }
241
242     if(dir<0) {
243         if(csc->start<csc->index) {
244             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
245             return c;
246         }
247     } else {
248         if(csc->index<csc->limit) {
249             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
250             return c;
251         }
252     }
253     return U_SENTINEL;
254 }
255
256 /*
257  * Case-maps [srcStart..srcLimit[ but takes
258  * context [0..srcLength[ into account.
259  */
260 static int32_t
261 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
262          uint8_t *dest, int32_t destCapacity,
263          const uint8_t *src, UCaseContext *csc,
264          int32_t srcStart, int32_t srcLimit,
265          UErrorCode *pErrorCode) {
266     const UChar *s = NULL;
267     UChar32 c, c2 = 0;
268     int32_t srcIndex, destIndex;
269     int32_t locCache;
270
271     locCache=csm->locCache;
272
273     /* case mapping loop */
274     srcIndex=srcStart;
275     destIndex=0;
276     while(srcIndex<srcLimit) {
277         csc->cpStart=srcIndex;
278         U8_NEXT(src, srcIndex, srcLimit, c);
279         csc->cpLimit=srcIndex;
280         if(c<0) {
281             // Malformed UTF-8.
282             destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
283             if(destIndex<0) {
284                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
285                 return 0;
286             }
287             continue;
288         }
289         c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
290         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
291             /* fast path version of appendResult() for ASCII results */
292             dest[destIndex++]=(uint8_t)c2;
293         } else {
294             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
295             if(destIndex<0) {
296                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
297                 return 0;
298             }
299         }
300     }
301
302     if(destIndex>destCapacity) {
303         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
304     }
305     return destIndex;
306 }
307
308 #if !UCONFIG_NO_BREAK_ITERATION
309
310 U_CFUNC int32_t U_CALLCONV
311 ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
312          uint8_t *dest, int32_t destCapacity,
313          const uint8_t *src, int32_t srcLength,
314          UErrorCode *pErrorCode) {
315     const UChar *s;
316     UChar32 c;
317     int32_t prev, titleStart, titleLimit, idx, destIndex;
318     UBool isFirstIndex;
319
320     if(U_FAILURE(*pErrorCode)) {
321         return 0;
322     }
323
324     // Use the C++ abstract base class to minimize dependencies.
325     // TODO: Change UCaseMap.iter to store a BreakIterator directly.
326     BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
327
328     /* set up local variables */
329     int32_t locCache=csm->locCache;
330     UCaseContext csc=UCASECONTEXT_INITIALIZER;
331     csc.p=(void *)src;
332     csc.limit=srcLength;
333     destIndex=0;
334     prev=0;
335     isFirstIndex=TRUE;
336
337     /* titlecasing loop */
338     while(prev<srcLength) {
339         /* find next index where to titlecase */
340         if(isFirstIndex) {
341             isFirstIndex=FALSE;
342             idx=bi->first();
343         } else {
344             idx=bi->next();
345         }
346         if(idx==UBRK_DONE || idx>srcLength) {
347             idx=srcLength;
348         }
349
350         /*
351          * Unicode 4 & 5 section 3.13 Default Case Operations:
352          *
353          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
354          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
355          * cased character F. If F exists, map F to default_title(F); then map each
356          * subsequent character C to default_lower(C).
357          *
358          * In this implementation, segment [prev..index[ into 3 parts:
359          * a) uncased characters (copy as-is) [prev..titleStart[
360          * b) first case letter (titlecase)         [titleStart..titleLimit[
361          * c) subsequent characters (lowercase)                 [titleLimit..index[
362          */
363         if(prev<idx) {
364             /* find and copy uncased characters [prev..titleStart[ */
365             titleStart=titleLimit=prev;
366             U8_NEXT(src, titleLimit, idx, c);
367             if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
368                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
369                 for(;;) {
370                     titleStart=titleLimit;
371                     if(titleLimit==idx) {
372                         /*
373                          * only uncased characters in [prev..index[
374                          * stop with titleStart==titleLimit==index
375                          */
376                         break;
377                     }
378                     U8_NEXT(src, titleLimit, idx, c);
379                     if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
380                         break; /* cased letter at [titleStart..titleLimit[ */
381                     }
382                 }
383                 destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
384                 if(destIndex<0) {
385                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
386                     return 0;
387                 }
388             }
389
390             if(titleStart<titleLimit) {
391                 /* titlecase c which is from [titleStart..titleLimit[ */
392                 if(c>=0) {
393                     csc.cpStart=titleStart;
394                     csc.cpLimit=titleLimit;
395                     c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
396                     destIndex=appendResult(dest, destIndex, destCapacity, c, s);
397                 } else {
398                     // Malformed UTF-8.
399                     destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
400                 }
401                 if(destIndex<0) {
402                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
403                     return 0;
404                 }
405
406                 /* Special case Dutch IJ titlecasing */
407                 if (titleStart+1 < idx &&
408                         ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
409                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
410                         (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
411                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
412                     titleLimit++;
413                 }
414                 /* lowercase [titleLimit..index[ */
415                 if(titleLimit<idx) {
416                     if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
417                         /* Normal operation: Lowercase the rest of the word. */
418                         destIndex+=
419                             _caseMap(
420                                 csm, ucase_toFullLower,
421                                 dest+destIndex, destCapacity-destIndex,
422                                 src, &csc,
423                                 titleLimit, idx,
424                                 pErrorCode);
425                         if(U_FAILURE(*pErrorCode)) {
426                             return destIndex;
427                         }
428                     } else {
429                         /* Optionally just copy the rest of the word unchanged. */
430                         destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
431                         if(destIndex<0) {
432                             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
433                             return 0;
434                         }
435                     }
436                 }
437             }
438         }
439
440         prev=idx;
441     }
442
443     if(destIndex>destCapacity) {
444         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
445     }
446     return destIndex;
447 }
448
449 #endif
450
451 U_NAMESPACE_BEGIN
452 namespace GreekUpper {
453
454 UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) {
455     while (i < length) {
456         UChar32 c;
457         U8_NEXT(s, i, length, c);
458         int32_t type = ucase_getTypeOrIgnorable(csp, c);
459         if ((type & UCASE_IGNORABLE) != 0) {
460             // Case-ignorable, continue with the loop.
461         } else if (type != UCASE_NONE) {
462             return TRUE;  // Followed by cased letter.
463         } else {
464             return FALSE;  // Uncased and not case-ignorable.
465         }
466     }
467     return FALSE;  // Not followed by cased letter.
468 }
469
470 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
471 int32_t toUpper(const UCaseMap *csm,
472                 uint8_t *dest, int32_t destCapacity,
473                 const uint8_t *src, int32_t srcLength,
474                 UErrorCode *pErrorCode) {
475     int32_t locCache = UCASE_LOC_GREEK;
476     int32_t destIndex=0;
477     uint32_t state = 0;
478     for (int32_t i = 0; i < srcLength;) {
479         int32_t nextIndex = i;
480         UChar32 c;
481         U8_NEXT(src, nextIndex, srcLength, c);
482         uint32_t nextState = 0;
483         int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
484         if ((type & UCASE_IGNORABLE) != 0) {
485             // c is case-ignorable
486             nextState |= (state & AFTER_CASED);
487         } else if (type != UCASE_NONE) {
488             // c is cased
489             nextState |= AFTER_CASED;
490         }
491         uint32_t data = getLetterData(c);
492         if (data > 0) {
493             uint32_t upper = data & UPPER_MASK;
494             // Add a dialytika to this iota or ypsilon vowel
495             // if we removed a tonos from the previous vowel,
496             // and that previous vowel did not also have (or gain) a dialytika.
497             // Adding one only to the final vowel in a longer sequence
498             // (which does not occur in normal writing) would require lookahead.
499             // Set the same flag as for preserving an existing dialytika.
500             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
501                     (upper == 0x399 || upper == 0x3A5)) {
502                 data |= HAS_DIALYTIKA;
503             }
504             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
505             if ((data & HAS_YPOGEGRAMMENI) != 0) {
506                 numYpogegrammeni = 1;
507             }
508             // Skip combining diacritics after this Greek letter.
509             int32_t nextNextIndex = nextIndex;
510             while (nextIndex < srcLength) {
511                 UChar32 c2;
512                 U8_NEXT(src, nextNextIndex, srcLength, c2);
513                 uint32_t diacriticData = getDiacriticData(c2);
514                 if (diacriticData != 0) {
515                     data |= diacriticData;
516                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
517                         ++numYpogegrammeni;
518                     }
519                     nextIndex = nextNextIndex;
520                 } else {
521                     break;  // not a Greek diacritic
522                 }
523             }
524             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
525                 nextState |= AFTER_VOWEL_WITH_ACCENT;
526             }
527             // Map according to Greek rules.
528             UBool addTonos = FALSE;
529             if (upper == 0x397 &&
530                     (data & HAS_ACCENT) != 0 &&
531                     numYpogegrammeni == 0 &&
532                     (state & AFTER_CASED) == 0 &&
533                     !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
534                 // Keep disjunctive "or" with (only) a tonos.
535                 // We use the same "word boundary" conditions as for the Final_Sigma test.
536                 if (i == nextIndex) {
537                     upper = 0x389;  // Preserve the precomposed form.
538                 } else {
539                     addTonos = TRUE;
540                 }
541             } else if ((data & HAS_DIALYTIKA) != 0) {
542                 // Preserve a vowel with dialytika in precomposed form if it exists.
543                 if (upper == 0x399) {
544                     upper = 0x3AA;
545                     data &= ~HAS_EITHER_DIALYTIKA;
546                 } else if (upper == 0x3A5) {
547                     upper = 0x3AB;
548                     data &= ~HAS_EITHER_DIALYTIKA;
549                 }
550             }
551             destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
552             if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
553                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
554             }
555             if (destIndex >= 0 && addTonos) {
556                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
557             }
558             while (destIndex >= 0 && numYpogegrammeni > 0) {
559                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
560                 --numYpogegrammeni;
561             }
562             if(destIndex<0) {
563                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
564                 return 0;
565             }
566         } else if(c>=0) {
567             const UChar *s;
568             UChar32 c2 = 0;
569             c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
570             if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
571                 /* fast path version of appendResult() for ASCII results */
572                 dest[destIndex++]=(uint8_t)c2;
573             } else {
574                 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
575                 if(destIndex<0) {
576                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
577                     return 0;
578                 }
579             }
580         } else {
581             // Malformed UTF-8.
582             destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
583             if(destIndex<0) {
584                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
585                 return 0;
586             }
587         }
588         i = nextIndex;
589         state = nextState;
590     }
591
592     if(destIndex>destCapacity) {
593         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
594     }
595     return destIndex;
596 }
597
598 }  // namespace GreekUpper
599 U_NAMESPACE_END
600
601 static int32_t U_CALLCONV
602 ucasemap_internalUTF8ToLower(const UCaseMap *csm,
603                              uint8_t *dest, int32_t destCapacity,
604                              const uint8_t *src, int32_t srcLength,
605                              UErrorCode *pErrorCode) {
606     UCaseContext csc=UCASECONTEXT_INITIALIZER;
607     csc.p=(void *)src;
608     csc.limit=srcLength;
609     return _caseMap(
610         csm, ucase_toFullLower,
611         dest, destCapacity,
612         src, &csc, 0, srcLength,
613         pErrorCode);
614 }
615
616 static int32_t U_CALLCONV
617 ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
618                              uint8_t *dest, int32_t destCapacity,
619                              const uint8_t *src, int32_t srcLength,
620                              UErrorCode *pErrorCode) {
621     int32_t locCache = csm->locCache;
622     if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
623         return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
624     }
625     UCaseContext csc=UCASECONTEXT_INITIALIZER;
626     csc.p=(void *)src;
627     csc.limit=srcLength;
628     return _caseMap(
629         csm, ucase_toFullUpper,
630         dest, destCapacity,
631         src, &csc, 0, srcLength,
632         pErrorCode);
633 }
634
635 static int32_t
636 utf8_foldCase(const UCaseProps *csp,
637               uint8_t *dest, int32_t destCapacity,
638               const uint8_t *src, int32_t srcLength,
639               uint32_t options,
640               UErrorCode *pErrorCode) {
641     int32_t srcIndex, destIndex;
642
643     const UChar *s;
644     UChar32 c, c2;
645     int32_t start;
646
647     /* case mapping loop */
648     srcIndex=destIndex=0;
649     while(srcIndex<srcLength) {
650         start=srcIndex;
651         U8_NEXT(src, srcIndex, srcLength, c);
652         if(c<0) {
653             // Malformed UTF-8.
654             destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
655             if(destIndex<0) {
656                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
657                 return 0;
658             }
659             continue;
660         }
661         c=ucase_toFullFolding(csp, c, &s, options);
662         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
663             /* fast path version of appendResult() for ASCII results */
664             dest[destIndex++]=(uint8_t)c2;
665         } else {
666             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
667             if(destIndex<0) {
668                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
669                 return 0;
670             }
671         }
672     }
673
674     if(destIndex>destCapacity) {
675         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
676     }
677     return destIndex;
678 }
679
680 static int32_t U_CALLCONV
681 ucasemap_internalUTF8Fold(const UCaseMap *csm,
682                           uint8_t *dest, int32_t destCapacity,
683                           const uint8_t *src, int32_t srcLength,
684                           UErrorCode *pErrorCode) {
685     return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
686 }
687
688 U_CFUNC int32_t
689 ucasemap_mapUTF8(const UCaseMap *csm,
690                  uint8_t *dest, int32_t destCapacity,
691                  const uint8_t *src, int32_t srcLength,
692                  UTF8CaseMapper *stringCaseMapper,
693                  UErrorCode *pErrorCode) {
694     int32_t destLength;
695
696     /* check argument values */
697     if(U_FAILURE(*pErrorCode)) {
698         return 0;
699     }
700     if( destCapacity<0 ||
701         (dest==NULL && destCapacity>0) ||
702         src==NULL ||
703         srcLength<-1
704     ) {
705         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
706         return 0;
707     }
708
709     /* get the string length */
710     if(srcLength==-1) {
711         srcLength=(int32_t)uprv_strlen((const char *)src);
712     }
713
714     /* check for overlapping source and destination */
715     if( dest!=NULL &&
716         ((src>=dest && src<(dest+destCapacity)) ||
717          (dest>=src && dest<(src+srcLength)))
718     ) {
719         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
720         return 0;
721     }
722
723     destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
724     return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
725 }
726
727 /* public API functions */
728
729 U_CAPI int32_t U_EXPORT2
730 ucasemap_utf8ToLower(const UCaseMap *csm,
731                      char *dest, int32_t destCapacity,
732                      const char *src, int32_t srcLength,
733                      UErrorCode *pErrorCode) {
734     return ucasemap_mapUTF8(csm,
735                    (uint8_t *)dest, destCapacity,
736                    (const uint8_t *)src, srcLength,
737                    ucasemap_internalUTF8ToLower, pErrorCode);
738 }
739
740 U_CAPI int32_t U_EXPORT2
741 ucasemap_utf8ToUpper(const UCaseMap *csm,
742                      char *dest, int32_t destCapacity,
743                      const char *src, int32_t srcLength,
744                      UErrorCode *pErrorCode) {
745     return ucasemap_mapUTF8(csm,
746                    (uint8_t *)dest, destCapacity,
747                    (const uint8_t *)src, srcLength,
748                    ucasemap_internalUTF8ToUpper, pErrorCode);
749 }
750
751 U_CAPI int32_t U_EXPORT2
752 ucasemap_utf8FoldCase(const UCaseMap *csm,
753                       char *dest, int32_t destCapacity,
754                       const char *src, int32_t srcLength,
755                       UErrorCode *pErrorCode) {
756     return ucasemap_mapUTF8(csm,
757                    (uint8_t *)dest, destCapacity,
758                    (const uint8_t *)src, srcLength,
759                    ucasemap_internalUTF8Fold, pErrorCode);
760 }