source/common/ucasemap.cpp

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 *
   6 *   Copyright (C) 2005-2016, International Business Machines
   7 *   Corporation and others.  All Rights Reserved.
   8 *
   9 *******************************************************************************
  10 *   file name:  ucasemap.cpp
  11 *   encoding:   US-ASCII
  12 *   tab size:   8 (not used)
  13 *   indentation:4
  14 *
  15 *   created on: 2005may06
  16 *   created by: Markus W. Scherer
  17 *
  18 *   Case mapping service object and functions using it.
  19 */
  20
  21 #include "unicode/utypes.h"
  22 #include "unicode/brkiter.h"
  23 #include "unicode/ubrk.h"
  24 #include "unicode/uloc.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/ucasemap.h"
  27 #if !UCONFIG_NO_BREAK_ITERATION
  28 #include "unicode/utext.h"
  29 #endif
  30 #include "unicode/utf.h"
  31 #include "unicode/utf8.h"
  32 #include "unicode/utf16.h"
  33 #include "cmemory.h"
  34 #include "cstring.h"
  35 #include "ucase.h"
  36 #include "ustr_imp.h"
  37
  38 U_NAMESPACE_USE
  39
  40 /* UCaseMap service object -------------------------------------------------- */
  41
  42 U_CAPI UCaseMap * U_EXPORT2
  43 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
  44     UCaseMap *csm;
  45
  46     if(U_FAILURE(*pErrorCode)) {
  47         return NULL;
  48     }
  49
  50     csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
  51     if(csm==NULL) {
  52         return NULL;
  53     }
  54     uprv_memset(csm, 0, sizeof(UCaseMap));
  55
  56     csm->csp=ucase_getSingleton();
  57     ucasemap_setLocale(csm, locale, pErrorCode);
  58     if(U_FAILURE(*pErrorCode)) {
  59         uprv_free(csm);
  60         return NULL;
  61     }
  62
  63     csm->options=options;
  64     return csm;
  65 }
  66
  67 U_CAPI void U_EXPORT2
  68 ucasemap_close(UCaseMap *csm) {
  69     if(csm!=NULL) {
  70 #if !UCONFIG_NO_BREAK_ITERATION
  71         // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
  72         delete reinterpret_cast<BreakIterator *>(csm->iter);
  73 #endif
  74         uprv_free(csm);
  75     }
  76 }
  77
  78 U_CAPI const char * U_EXPORT2
  79 ucasemap_getLocale(const UCaseMap *csm) {
  80     return csm->locale;
  81 }
  82
  83 U_CAPI uint32_t U_EXPORT2
  84 ucasemap_getOptions(const UCaseMap *csm) {
  85     return csm->options;
  86 }
  87
  88 U_CAPI void U_EXPORT2
  89 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
  90     int32_t length;
  91
  92     if(U_FAILURE(*pErrorCode)) {
  93         return;
  94     }
  95
  96     length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
  97     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
  98         *pErrorCode=U_ZERO_ERROR;
  99         /* we only really need the language code for case mappings */
 100         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
 101     }
 102     if(length==sizeof(csm->locale)) {
 103         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 104     }
 105     csm->locCache=0;
 106     if(U_SUCCESS(*pErrorCode)) {
 107         ucase_getCaseLocale(csm->locale, &csm->locCache);
 108     } else {
 109         csm->locale[0]=0;
 110     }
 111 }
 112
 113 U_CAPI void U_EXPORT2
 114 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) {
 115     csm->options=options;
 116 }
 117
 118 /* UTF-8 string case mappings ----------------------------------------------- */
 119
 120 /* TODO(markus): Move to a new, separate utf8case.c file. */
 121
 122 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
 123 static inline int32_t
 124 appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
 125              int32_t result, const UChar *s) {
 126     UChar32 c;
 127     int32_t length;
 128     UErrorCode errorCode;
 129
 130     /* decode the result */
 131     if(result<0) {
 132         /* (not) original code point */
 133         c=~result;
 134         length=U8_LENGTH(c);
 135     } else if(result<=UCASE_MAX_STRING_LENGTH) {
 136         c=U_SENTINEL;
 137         length=result;
 138     } else {
 139         c=result;
 140         length=U8_LENGTH(c);
 141     }
 142     if(length>(INT32_MAX-destIndex)) {
 143         return -1;  // integer overflow
 144     }
 145
 146     if(destIndex<destCapacity) {
 147         /* append the result */
 148         if(c>=0) {
 149             /* code point */
 150             UBool isError=FALSE;
 151             U8_APPEND(dest, destIndex, destCapacity, c, isError);
 152             if(isError) {
 153                 /* overflow, nothing written */
 154                 destIndex+=length;
 155             }
 156         } else {
 157             /* string */
 158             int32_t destLength;
 159             errorCode=U_ZERO_ERROR;
 160             u_strToUTF8(
 161                 (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
 162                 s, length,
 163                 &errorCode);
 164             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
 165                 return -1;
 166             }
 167             if(destLength>(INT32_MAX-destIndex)) {
 168                 return -1;  // integer overflow
 169             }
 170             destIndex+=destLength;
 171             /* we might have an overflow, but we know the actual length */
 172         }
 173     } else {
 174         /* preflight */
 175         if(c>=0) {
 176             destIndex+=length;
 177         } else {
 178             int32_t destLength;
 179             errorCode=U_ZERO_ERROR;
 180             u_strToUTF8(
 181                 NULL, 0, &destLength,
 182                 s, length,
 183                 &errorCode);
 184             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
 185                 return -1;
 186             }
 187             if(destLength>(INT32_MAX-destIndex)) {
 188                 return -1;  // integer overflow
 189             }
 190             destIndex+=destLength;
 191         }
 192     }
 193     return destIndex;
 194 }
 195
 196 static inline int32_t
 197 appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
 198     int32_t length=U8_LENGTH(c);
 199     if(length>(INT32_MAX-destIndex)) {
 200         return -1;  // integer overflow
 201     }
 202     int32_t limit=destIndex+length;
 203     if(limit<destCapacity) {
 204         U8_APPEND_UNSAFE(dest, destIndex, c);
 205     }
 206     return limit;
 207 }
 208
 209 static inline int32_t
 210 appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
 211              const uint8_t *s, int32_t length) {
 212     if(length>0) {
 213         if(length>(INT32_MAX-destIndex)) {
 214             return -1;  // integer overflow
 215         }
 216         if((destIndex+length)<=destCapacity) {
 217             uprv_memcpy(dest+destIndex, s, length);
 218         }
 219         destIndex+=length;
 220     }
 221     return destIndex;
 222 }
 223
 224 static UChar32 U_CALLCONV
 225 utf8_caseContextIterator(void *context, int8_t dir) {
 226     UCaseContext *csc=(UCaseContext *)context;
 227     UChar32 c;
 228
 229     if(dir<0) {
 230         /* reset for backward iteration */
 231         csc->index=csc->cpStart;
 232         csc->dir=dir;
 233     } else if(dir>0) {
 234         /* reset for forward iteration */
 235         csc->index=csc->cpLimit;
 236         csc->dir=dir;
 237     } else {
 238         /* continue current iteration direction */
 239         dir=csc->dir;
 240     }
 241
 242     if(dir<0) {
 243         if(csc->start<csc->index) {
 244             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
 245             return c;
 246         }
 247     } else {
 248         if(csc->index<csc->limit) {
 249             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
 250             return c;
 251         }
 252     }
 253     return U_SENTINEL;
 254 }
 255
 256 /*
 257  * Case-maps [srcStart..srcLimit[ but takes
 258  * context [0..srcLength[ into account.
 259  */
 260 static int32_t
 261 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
 262          uint8_t *dest, int32_t destCapacity,
 263          const uint8_t *src, UCaseContext *csc,
 264          int32_t srcStart, int32_t srcLimit,
 265          UErrorCode *pErrorCode) {
 266     const UChar *s = NULL;
 267     UChar32 c, c2 = 0;
 268     int32_t srcIndex, destIndex;
 269     int32_t locCache;
 270
 271     locCache=csm->locCache;
 272
 273     /* case mapping loop */
 274     srcIndex=srcStart;
 275     destIndex=0;
 276     while(srcIndex<srcLimit) {
 277         csc->cpStart=srcIndex;
 278         U8_NEXT(src, srcIndex, srcLimit, c);
 279         csc->cpLimit=srcIndex;
 280         if(c<0) {
 281             // Malformed UTF-8.
 282             destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
 283             if(destIndex<0) {
 284                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 285                 return 0;
 286             }
 287             continue;
 288         }
 289         c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
 290         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
 291             /* fast path version of appendResult() for ASCII results */
 292             dest[destIndex++]=(uint8_t)c2;
 293         } else {
 294             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 295             if(destIndex<0) {
 296                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 297                 return 0;
 298             }
 299         }
 300     }
 301
 302     if(destIndex>destCapacity) {
 303         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 304     }
 305     return destIndex;
 306 }
 307
 308 #if !UCONFIG_NO_BREAK_ITERATION
 309
 310 U_CFUNC int32_t U_CALLCONV
 311 ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
 312          uint8_t *dest, int32_t destCapacity,
 313          const uint8_t *src, int32_t srcLength,
 314          UErrorCode *pErrorCode) {
 315     const UChar *s;
 316     UChar32 c;
 317     int32_t prev, titleStart, titleLimit, idx, destIndex;
 318     UBool isFirstIndex;
 319
 320     if(U_FAILURE(*pErrorCode)) {
 321         return 0;
 322     }
 323
 324     // Use the C++ abstract base class to minimize dependencies.
 325     // TODO: Change UCaseMap.iter to store a BreakIterator directly.
 326     BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
 327
 328     /* set up local variables */
 329     int32_t locCache=csm->locCache;
 330     UCaseContext csc=UCASECONTEXT_INITIALIZER;
 331     csc.p=(void *)src;
 332     csc.limit=srcLength;
 333     destIndex=0;
 334     prev=0;
 335     isFirstIndex=TRUE;
 336
 337     /* titlecasing loop */
 338     while(prev<srcLength) {
 339         /* find next index where to titlecase */
 340         if(isFirstIndex) {
 341             isFirstIndex=FALSE;
 342             idx=bi->first();
 343         } else {
 344             idx=bi->next();
 345         }
 346         if(idx==UBRK_DONE || idx>srcLength) {
 347             idx=srcLength;
 348         }
 349
 350         /*
 351          * Unicode 4 & 5 section 3.13 Default Case Operations:
 352          *
 353          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
 354          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
 355          * cased character F. If F exists, map F to default_title(F); then map each
 356          * subsequent character C to default_lower(C).
 357          *
 358          * In this implementation, segment [prev..index[ into 3 parts:
 359          * a) uncased characters (copy as-is) [prev..titleStart[
 360          * b) first case letter (titlecase)         [titleStart..titleLimit[
 361          * c) subsequent characters (lowercase)                 [titleLimit..index[
 362          */
 363         if(prev<idx) {
 364             /* find and copy uncased characters [prev..titleStart[ */
 365             titleStart=titleLimit=prev;
 366             U8_NEXT(src, titleLimit, idx, c);
 367             if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
 368                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
 369                 for(;;) {
 370                     titleStart=titleLimit;
 371                     if(titleLimit==idx) {
 372                         /*
 373                          * only uncased characters in [prev..index[
 374                          * stop with titleStart==titleLimit==index
 375                          */
 376                         break;
 377                     }
 378                     U8_NEXT(src, titleLimit, idx, c);
 379                     if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
 380                         break; /* cased letter at [titleStart..titleLimit[ */
 381                     }
 382                 }
 383                 destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
 384                 if(destIndex<0) {
 385                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 386                     return 0;
 387                 }
 388             }
 389
 390             if(titleStart<titleLimit) {
 391                 /* titlecase c which is from [titleStart..titleLimit[ */
 392                 if(c>=0) {
 393                     csc.cpStart=titleStart;
 394                     csc.cpLimit=titleLimit;
 395                     c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
 396                     destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 397                 } else {
 398                     // Malformed UTF-8.
 399                     destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
 400                 }
 401                 if(destIndex<0) {
 402                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 403                     return 0;
 404                 }
 405
 406                 /* Special case Dutch IJ titlecasing */
 407                 if (titleStart+1 < idx &&
 408                         ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
 409                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
 410                         (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
 411                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
 412                     titleLimit++;
 413                 }
 414                 /* lowercase [titleLimit..index[ */
 415                 if(titleLimit<idx) {
 416                     if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
 417                         /* Normal operation: Lowercase the rest of the word. */
 418                         destIndex+=
 419                             _caseMap(
 420                                 csm, ucase_toFullLower,
 421                                 dest+destIndex, destCapacity-destIndex,
 422                                 src, &csc,
 423                                 titleLimit, idx,
 424                                 pErrorCode);
 425                         if(U_FAILURE(*pErrorCode)) {
 426                             return destIndex;
 427                         }
 428                     } else {
 429                         /* Optionally just copy the rest of the word unchanged. */
 430                         destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
 431                         if(destIndex<0) {
 432                             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 433                             return 0;
 434                         }
 435                     }
 436                 }
 437             }
 438         }
 439
 440         prev=idx;
 441     }
 442
 443     if(destIndex>destCapacity) {
 444         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 445     }
 446     return destIndex;
 447 }
 448
 449 #endif
 450
 451 U_NAMESPACE_BEGIN
 452 namespace GreekUpper {
 453
 454 UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) {
 455     while (i < length) {
 456         UChar32 c;
 457         U8_NEXT(s, i, length, c);
 458         int32_t type = ucase_getTypeOrIgnorable(csp, c);
 459         if ((type & UCASE_IGNORABLE) != 0) {
 460             // Case-ignorable, continue with the loop.
 461         } else if (type != UCASE_NONE) {
 462             return TRUE;  // Followed by cased letter.
 463         } else {
 464             return FALSE;  // Uncased and not case-ignorable.
 465         }
 466     }
 467     return FALSE;  // Not followed by cased letter.
 468 }
 469
 470 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
 471 int32_t toUpper(const UCaseMap *csm,
 472                 uint8_t *dest, int32_t destCapacity,
 473                 const uint8_t *src, int32_t srcLength,
 474                 UErrorCode *pErrorCode) {
 475     int32_t locCache = UCASE_LOC_GREEK;
 476     int32_t destIndex=0;
 477     uint32_t state = 0;
 478     for (int32_t i = 0; i < srcLength;) {
 479         int32_t nextIndex = i;
 480         UChar32 c;
 481         U8_NEXT(src, nextIndex, srcLength, c);
 482         uint32_t nextState = 0;
 483         int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
 484         if ((type & UCASE_IGNORABLE) != 0) {
 485             // c is case-ignorable
 486             nextState |= (state & AFTER_CASED);
 487         } else if (type != UCASE_NONE) {
 488             // c is cased
 489             nextState |= AFTER_CASED;
 490         }
 491         uint32_t data = getLetterData(c);
 492         if (data > 0) {
 493             uint32_t upper = data & UPPER_MASK;
 494             // Add a dialytika to this iota or ypsilon vowel
 495             // if we removed a tonos from the previous vowel,
 496             // and that previous vowel did not also have (or gain) a dialytika.
 497             // Adding one only to the final vowel in a longer sequence
 498             // (which does not occur in normal writing) would require lookahead.
 499             // Set the same flag as for preserving an existing dialytika.
 500             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
 501                     (upper == 0x399 || upper == 0x3A5)) {
 502                 data |= HAS_DIALYTIKA;
 503             }
 504             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
 505             if ((data & HAS_YPOGEGRAMMENI) != 0) {
 506                 numYpogegrammeni = 1;
 507             }
 508             // Skip combining diacritics after this Greek letter.
 509             int32_t nextNextIndex = nextIndex;
 510             while (nextIndex < srcLength) {
 511                 UChar32 c2;
 512                 U8_NEXT(src, nextNextIndex, srcLength, c2);
 513                 uint32_t diacriticData = getDiacriticData(c2);
 514                 if (diacriticData != 0) {
 515                     data |= diacriticData;
 516                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
 517                         ++numYpogegrammeni;
 518                     }
 519                     nextIndex = nextNextIndex;
 520                 } else {
 521                     break;  // not a Greek diacritic
 522                 }
 523             }
 524             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
 525                 nextState |= AFTER_VOWEL_WITH_ACCENT;
 526             }
 527             // Map according to Greek rules.
 528             UBool addTonos = FALSE;
 529             if (upper == 0x397 &&
 530                     (data & HAS_ACCENT) != 0 &&
 531                     numYpogegrammeni == 0 &&
 532                     (state & AFTER_CASED) == 0 &&
 533                     !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
 534                 // Keep disjunctive "or" with (only) a tonos.
 535                 // We use the same "word boundary" conditions as for the Final_Sigma test.
 536                 if (i == nextIndex) {
 537                     upper = 0x389;  // Preserve the precomposed form.
 538                 } else {
 539                     addTonos = TRUE;
 540                 }
 541             } else if ((data & HAS_DIALYTIKA) != 0) {
 542                 // Preserve a vowel with dialytika in precomposed form if it exists.
 543                 if (upper == 0x399) {
 544                     upper = 0x3AA;
 545                     data &= ~HAS_EITHER_DIALYTIKA;
 546                 } else if (upper == 0x3A5) {
 547                     upper = 0x3AB;
 548                     data &= ~HAS_EITHER_DIALYTIKA;
 549                 }
 550             }
 551             destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
 552             if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
 553                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
 554             }
 555             if (destIndex >= 0 && addTonos) {
 556                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
 557             }
 558             while (destIndex >= 0 && numYpogegrammeni > 0) {
 559                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
 560                 --numYpogegrammeni;
 561             }
 562             if(destIndex<0) {
 563                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 564                 return 0;
 565             }
 566         } else if(c>=0) {
 567             const UChar *s;
 568             UChar32 c2 = 0;
 569             c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
 570             if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
 571                 /* fast path version of appendResult() for ASCII results */
 572                 dest[destIndex++]=(uint8_t)c2;
 573             } else {
 574                 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 575                 if(destIndex<0) {
 576                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 577                     return 0;
 578                 }
 579             }
 580         } else {
 581             // Malformed UTF-8.
 582             destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
 583             if(destIndex<0) {
 584                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 585                 return 0;
 586             }
 587         }
 588         i = nextIndex;
 589         state = nextState;
 590     }
 591
 592     if(destIndex>destCapacity) {
 593         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 594     }
 595     return destIndex;
 596 }
 597
 598 }  // namespace GreekUpper
 599 U_NAMESPACE_END
 600
 601 static int32_t U_CALLCONV
 602 ucasemap_internalUTF8ToLower(const UCaseMap *csm,
 603                              uint8_t *dest, int32_t destCapacity,
 604                              const uint8_t *src, int32_t srcLength,
 605                              UErrorCode *pErrorCode) {
 606     UCaseContext csc=UCASECONTEXT_INITIALIZER;
 607     csc.p=(void *)src;
 608     csc.limit=srcLength;
 609     return _caseMap(
 610         csm, ucase_toFullLower,
 611         dest, destCapacity,
 612         src, &csc, 0, srcLength,
 613         pErrorCode);
 614 }
 615
 616 static int32_t U_CALLCONV
 617 ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
 618                              uint8_t *dest, int32_t destCapacity,
 619                              const uint8_t *src, int32_t srcLength,
 620                              UErrorCode *pErrorCode) {
 621     int32_t locCache = csm->locCache;
 622     if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
 623         return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
 624     }
 625     UCaseContext csc=UCASECONTEXT_INITIALIZER;
 626     csc.p=(void *)src;
 627     csc.limit=srcLength;
 628     return _caseMap(
 629         csm, ucase_toFullUpper,
 630         dest, destCapacity,
 631         src, &csc, 0, srcLength,
 632         pErrorCode);
 633 }
 634
 635 static int32_t
 636 utf8_foldCase(const UCaseProps *csp,
 637               uint8_t *dest, int32_t destCapacity,
 638               const uint8_t *src, int32_t srcLength,
 639               uint32_t options,
 640               UErrorCode *pErrorCode) {
 641     int32_t srcIndex, destIndex;
 642
 643     const UChar *s;
 644     UChar32 c, c2;
 645     int32_t start;
 646
 647     /* case mapping loop */
 648     srcIndex=destIndex=0;
 649     while(srcIndex<srcLength) {
 650         start=srcIndex;
 651         U8_NEXT(src, srcIndex, srcLength, c);
 652         if(c<0) {
 653             // Malformed UTF-8.
 654             destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
 655             if(destIndex<0) {
 656                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 657                 return 0;
 658             }
 659             continue;
 660         }
 661         c=ucase_toFullFolding(csp, c, &s, options);
 662         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
 663             /* fast path version of appendResult() for ASCII results */
 664             dest[destIndex++]=(uint8_t)c2;
 665         } else {
 666             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 667             if(destIndex<0) {
 668                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 669                 return 0;
 670             }
 671         }
 672     }
 673
 674     if(destIndex>destCapacity) {
 675         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 676     }
 677     return destIndex;
 678 }
 679
 680 static int32_t U_CALLCONV
 681 ucasemap_internalUTF8Fold(const UCaseMap *csm,
 682                           uint8_t *dest, int32_t destCapacity,
 683                           const uint8_t *src, int32_t srcLength,
 684                           UErrorCode *pErrorCode) {
 685     return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
 686 }
 687
 688 U_CFUNC int32_t
 689 ucasemap_mapUTF8(const UCaseMap *csm,
 690                  uint8_t *dest, int32_t destCapacity,
 691                  const uint8_t *src, int32_t srcLength,
 692                  UTF8CaseMapper *stringCaseMapper,
 693                  UErrorCode *pErrorCode) {
 694     int32_t destLength;
 695
 696     /* check argument values */
 697     if(U_FAILURE(*pErrorCode)) {
 698         return 0;
 699     }
 700     if( destCapacity<0 ||
 701         (dest==NULL && destCapacity>0) ||
 702         src==NULL ||
 703         srcLength<-1
 704     ) {
 705         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 706         return 0;
 707     }
 708
 709     /* get the string length */
 710     if(srcLength==-1) {
 711         srcLength=(int32_t)uprv_strlen((const char *)src);
 712     }
 713
 714     /* check for overlapping source and destination */
 715     if( dest!=NULL &&
 716         ((src>=dest && src<(dest+destCapacity)) ||
 717          (dest>=src && dest<(src+srcLength)))
 718     ) {
 719         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 720         return 0;
 721     }
 722
 723     destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
 724     return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
 725 }
 726
 727 /* public API functions */
 728
 729 U_CAPI int32_t U_EXPORT2
 730 ucasemap_utf8ToLower(const UCaseMap *csm,
 731                      char *dest, int32_t destCapacity,
 732                      const char *src, int32_t srcLength,
 733                      UErrorCode *pErrorCode) {
 734     return ucasemap_mapUTF8(csm,
 735                    (uint8_t *)dest, destCapacity,
 736                    (const uint8_t *)src, srcLength,
 737                    ucasemap_internalUTF8ToLower, pErrorCode);
 738 }
 739
 740 U_CAPI int32_t U_EXPORT2
 741 ucasemap_utf8ToUpper(const UCaseMap *csm,
 742                      char *dest, int32_t destCapacity,
 743                      const char *src, int32_t srcLength,
 744                      UErrorCode *pErrorCode) {
 745     return ucasemap_mapUTF8(csm,
 746                    (uint8_t *)dest, destCapacity,
 747                    (const uint8_t *)src, srcLength,
 748                    ucasemap_internalUTF8ToUpper, pErrorCode);
 749 }
 750
 751 U_CAPI int32_t U_EXPORT2
 752 ucasemap_utf8FoldCase(const UCaseMap *csm,
 753                       char *dest, int32_t destCapacity,
 754                       const char *src, int32_t srcLength,
 755                       UErrorCode *pErrorCode) {
 756     return ucasemap_mapUTF8(csm,
 757                    (uint8_t *)dest, destCapacity,
 758                    (const uint8_t *)src, srcLength,
 759                    ucasemap_internalUTF8Fold, pErrorCode);
 760 }