src/third_party/icu/source/common/unames.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2009, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *   file name:  unames.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 1999oct04
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18 #include "unicode/putil.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/udata.h"
  21 #include "ustr_imp.h"
  22 #include "umutex.h"
  23 #include "cmemory.h"
  24 #include "cstring.h"
  25 #include "ucln_cmn.h"
  26 #include "udataswp.h"
  27 #include "uprops.h"
  28
  29 /* prototypes ------------------------------------------------------------- */
  30
  31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
  32
  33 static const char DATA_NAME[] = "unames";
  34 static const char DATA_TYPE[] = "icu";
  35
  36 #define GROUP_SHIFT 5
  37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
  38 #define GROUP_MASK (LINES_PER_GROUP-1)
  39
  40 /*
  41  * This struct was replaced by explicitly accessing equivalent
  42  * fields from triples of uint16_t.
  43  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
  44  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
  45  * would advance by 6 bytes (3 uint16_t).
  46  *
  47  * We can't just change the data structure because it's loaded from a data file,
  48  * and we don't want to make it less compact, so we changed the access code.
  49  *
  50  * For details see ICU tickets 6331 and 6008.
  51 typedef struct {
  52     uint16_t groupMSB,
  53              offsetHigh, offsetLow; / * avoid padding * /
  54 } Group;
  55  */
  56 enum {
  57     GROUP_MSB,
  58     GROUP_OFFSET_HIGH,
  59     GROUP_OFFSET_LOW,
  60     GROUP_LENGTH
  61 };
  62
  63 /*
  64  * Get the 32-bit group offset.
  65  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
  66  * @return group offset (int32_t)
  67  */
  68 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
  69
  70 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
  71 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
  72
  73 typedef struct {
  74     uint32_t start, end;
  75     uint8_t type, variant;
  76     uint16_t size;
  77 } AlgorithmicRange;
  78
  79 typedef struct {
  80     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
  81 } UCharNames;
  82
  83 /*
  84  * Get the groups table from a UCharNames struct.
  85  * The groups table consists of one uint16_t groupCount followed by
  86  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
  87  * and the comment for the old struct Group above.
  88  *
  89  * @param names (const UCharNames *) pointer to the UCharNames indexes
  90  * @return (const uint16_t *) pointer to the groups table
  91  */
  92 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
  93
  94 typedef struct {
  95     const char *otherName;
  96     UChar32 code;
  97 } FindName;
  98
  99 #define DO_FIND_NAME NULL
 100
 101 static UDataMemory *uCharNamesData=NULL;
 102 static UCharNames *uCharNames=NULL;
 103 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
 104
 105 /*
 106  * Maximum length of character names (regular & 1.0).
 107  */
 108 static int32_t gMaxNameLength=0;
 109
 110 /*
 111  * Set of chars used in character names (regular & 1.0).
 112  * Chars are platform-dependent (can be EBCDIC).
 113  */
 114 static uint32_t gNameSet[8]={ 0 };
 115
 116 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
 117 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
 118 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
 119
 120 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
 121
 122 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
 123     "unassigned",
 124     "uppercase letter",
 125     "lowercase letter",
 126     "titlecase letter",
 127     "modifier letter",
 128     "other letter",
 129     "non spacing mark",
 130     "enclosing mark",
 131     "combining spacing mark",
 132     "decimal digit number",
 133     "letter number",
 134     "other number",
 135     "space separator",
 136     "line separator",
 137     "paragraph separator",
 138     "control",
 139     "format",
 140     "private use area",
 141     "surrogate",
 142     "dash punctuation",
 143     "start punctuation",
 144     "end punctuation",
 145     "connector punctuation",
 146     "other punctuation",
 147     "math symbol",
 148     "currency symbol",
 149     "modifier symbol",
 150     "other symbol",
 151     "initial punctuation",
 152     "final punctuation",
 153     "noncharacter",
 154     "lead surrogate",
 155     "trail surrogate"
 156 };
 157
 158 /* implementation ----------------------------------------------------------- */
 159
 160 static UBool U_CALLCONV unames_cleanup(void)
 161 {
 162     if(uCharNamesData) {
 163         udata_close(uCharNamesData);
 164         uCharNamesData = NULL;
 165     }
 166     if(uCharNames) {
 167         uCharNames = NULL;
 168     }
 169     gMaxNameLength=0;
 170     return TRUE;
 171 }
 172
 173 static UBool U_CALLCONV
 174 isAcceptable(void *context,
 175              const char *type, const char *name,
 176              const UDataInfo *pInfo) {
 177     return (UBool)(
 178         pInfo->size>=20 &&
 179         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
 180         pInfo->charsetFamily==U_CHARSET_FAMILY &&
 181         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
 182         pInfo->dataFormat[1]==0x6e &&
 183         pInfo->dataFormat[2]==0x61 &&
 184         pInfo->dataFormat[3]==0x6d &&
 185         pInfo->formatVersion[0]==1);
 186 }
 187
 188 static UBool
 189 isDataLoaded(UErrorCode *pErrorCode) {
 190     /* load UCharNames from file if necessary */
 191     UBool isCached;
 192
 193     /* do this because double-checked locking is broken */
 194     UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
 195
 196     if(!isCached) {
 197         UCharNames *names;
 198         UDataMemory *data;
 199
 200         /* check error code from previous attempt */
 201         if(U_FAILURE(gLoadErrorCode)) {
 202             *pErrorCode=gLoadErrorCode;
 203             return FALSE;
 204         }
 205
 206         /* open the data outside the mutex block */
 207         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
 208         if(U_FAILURE(*pErrorCode)) {
 209             gLoadErrorCode=*pErrorCode;
 210             return FALSE;
 211         }
 212
 213         names=(UCharNames *)udata_getMemory(data);
 214
 215         /* in the mutex block, set the data for this process */
 216         {
 217             umtx_lock(NULL);
 218             if(uCharNames==NULL) {
 219                 uCharNamesData=data;
 220                 uCharNames=names;
 221                 data=NULL;
 222                 names=NULL;
 223                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
 224             }
 225             umtx_unlock(NULL);
 226         }
 227
 228         /* if a different thread set it first, then close the extra data */
 229         if(data!=NULL) {
 230             udata_close(data); /* NULL if it was set correctly */
 231         }
 232     }
 233     return TRUE;
 234 }
 235
 236 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
 237     if((bufferLength)>0) { \
 238         *(buffer)++=c; \
 239         --(bufferLength); \
 240     } \
 241     ++(bufferPos); \
 242 }
 243
 244 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
 245
 246 /*
 247  * Important: expandName() and compareName() are almost the same -
 248  * apply fixes to both.
 249  *
 250  * UnicodeData.txt uses ';' as a field separator, so no
 251  * field can contain ';' as part of its contents.
 252  * In unames.dat, it is marked as token[';']==-1 only if the
 253  * semicolon is used in the data file - which is iff we
 254  * have Unicode 1.0 names or ISO comments or aliases.
 255  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
 256  * although we know that it will never be part of a name.
 257  */
 258 static uint16_t
 259 expandName(UCharNames *names,
 260            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 261            char *buffer, uint16_t bufferLength) {
 262     uint16_t *tokens=(uint16_t *)names+8;
 263     uint16_t token, tokenCount=*tokens++, bufferPos=0;
 264     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 265     uint8_t c;
 266
 267     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 268         /*
 269          * skip the modern name if it is not requested _and_
 270          * if the semicolon byte value is a character, not a token number
 271          */
 272         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 273             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 274             do {
 275                 while(nameLength>0) {
 276                     --nameLength;
 277                     if(*name++==';') {
 278                         break;
 279                     }
 280                 }
 281             } while(--fieldIndex>0);
 282         } else {
 283             /*
 284              * the semicolon byte value is a token number, therefore
 285              * only modern names are stored in unames.dat and there is no
 286              * such requested alternate name here
 287              */
 288             nameLength=0;
 289         }
 290     }
 291
 292     /* write each letter directly, and write a token word per token */
 293     while(nameLength>0) {
 294         --nameLength;
 295         c=*name++;
 296
 297         if(c>=tokenCount) {
 298             if(c!=';') {
 299                 /* implicit letter */
 300                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 301             } else {
 302                 /* finished */
 303                 break;
 304             }
 305         } else {
 306             token=tokens[c];
 307             if(token==(uint16_t)(-2)) {
 308                 /* this is a lead byte for a double-byte token */
 309                 token=tokens[c<<8|*name++];
 310                 --nameLength;
 311             }
 312             if(token==(uint16_t)(-1)) {
 313                 if(c!=';') {
 314                     /* explicit letter */
 315                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 316                 } else {
 317                     /* stop, but skip the semicolon if we are seeking
 318                        extended names and there was no 2.0 name but there
 319                        is a 1.0 name. */
 320                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
 321                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 322                             continue;
 323                         }
 324                     }
 325                     /* finished */
 326                     break;
 327                 }
 328             } else {
 329                 /* write token word */
 330                 uint8_t *tokenString=tokenStrings+token;
 331                 while((c=*tokenString++)!=0) {
 332                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 333                 }
 334             }
 335         }
 336     }
 337
 338     /* zero-terminate */
 339     if(bufferLength>0) {
 340         *buffer=0;
 341     }
 342
 343     return bufferPos;
 344 }
 345
 346 /*
 347  * compareName() is almost the same as expandName() except that it compares
 348  * the currently expanded name to an input name.
 349  * It returns the match/no match result as soon as possible.
 350  */
 351 static UBool
 352 compareName(UCharNames *names,
 353             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 354             const char *otherName) {
 355     uint16_t *tokens=(uint16_t *)names+8;
 356     uint16_t token, tokenCount=*tokens++;
 357     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 358     uint8_t c;
 359     const char *origOtherName = otherName;
 360
 361     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 362         /*
 363          * skip the modern name if it is not requested _and_
 364          * if the semicolon byte value is a character, not a token number
 365          */
 366         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 367             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 368             do {
 369                 while(nameLength>0) {
 370                     --nameLength;
 371                     if(*name++==';') {
 372                         break;
 373                     }
 374                 }
 375             } while(--fieldIndex>0);
 376         } else {
 377             /*
 378              * the semicolon byte value is a token number, therefore
 379              * only modern names are stored in unames.dat and there is no
 380              * such requested alternate name here
 381              */
 382             nameLength=0;
 383         }
 384     }
 385
 386     /* compare each letter directly, and compare a token word per token */
 387     while(nameLength>0) {
 388         --nameLength;
 389         c=*name++;
 390
 391         if(c>=tokenCount) {
 392             if(c!=';') {
 393                 /* implicit letter */
 394                 if((char)c!=*otherName++) {
 395                     return FALSE;
 396                 }
 397             } else {
 398                 /* finished */
 399                 break;
 400             }
 401         } else {
 402             token=tokens[c];
 403             if(token==(uint16_t)(-2)) {
 404                 /* this is a lead byte for a double-byte token */
 405                 token=tokens[c<<8|*name++];
 406                 --nameLength;
 407             }
 408             if(token==(uint16_t)(-1)) {
 409                 if(c!=';') {
 410                     /* explicit letter */
 411                     if((char)c!=*otherName++) {
 412                         return FALSE;
 413                     }
 414                 } else {
 415                     /* stop, but skip the semicolon if we are seeking
 416                        extended names and there was no 2.0 name but there
 417                        is a 1.0 name. */
 418                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
 419                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 420                             continue;
 421                         }
 422                     }
 423                     /* finished */
 424                     break;
 425                 }
 426             } else {
 427                 /* write token word */
 428                 uint8_t *tokenString=tokenStrings+token;
 429                 while((c=*tokenString++)!=0) {
 430                     if((char)c!=*otherName++) {
 431                         return FALSE;
 432                     }
 433                 }
 434             }
 435         }
 436     }
 437
 438     /* complete match? */
 439     return (UBool)(*otherName==0);
 440 }
 441
 442 static uint8_t getCharCat(UChar32 cp) {
 443     uint8_t cat;
 444
 445     if (UTF_IS_UNICODE_NONCHAR(cp)) {
 446         return U_NONCHARACTER_CODE_POINT;
 447     }
 448
 449     if ((cat = u_charType(cp)) == U_SURROGATE) {
 450         cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
 451     }
 452
 453     return cat;
 454 }
 455
 456 static const char *getCharCatName(UChar32 cp) {
 457     uint8_t cat = getCharCat(cp);
 458
 459     /* Return unknown if the table of names above is not up to
 460        date. */
 461
 462     if (cat >= LENGTHOF(charCatNames)) {
 463         return "unknown";
 464     } else {
 465         return charCatNames[cat];
 466     }
 467 }
 468
 469 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
 470     const char *catname = getCharCatName(code);
 471     uint16_t length = 0;
 472
 473     UChar32 cp;
 474     int ndigits, i;
 475
 476     WRITE_CHAR(buffer, bufferLength, length, '<');
 477     while (catname[length - 1]) {
 478         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
 479     }
 480     WRITE_CHAR(buffer, bufferLength, length, '-');
 481     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
 482         ;
 483     if (ndigits < 4)
 484         ndigits = 4;
 485     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
 486         uint8_t v = (uint8_t)(cp & 0xf);
 487         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
 488     }
 489     buffer += ndigits;
 490     length += ndigits;
 491     WRITE_CHAR(buffer, bufferLength, length, '>');
 492
 493     return length;
 494 }
 495
 496 /*
 497  * getGroup() does a binary search for the group that contains the
 498  * Unicode code point "code".
 499  * The return value is always a valid Group* that may contain "code"
 500  * or else is the highest group before "code".
 501  * If the lowest group is after "code", then that one is returned.
 502  */
 503 static const uint16_t *
 504 getGroup(UCharNames *names, uint32_t code) {
 505     const uint16_t *groups=GET_GROUPS(names);
 506     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
 507              start=0,
 508              limit=*groups++,
 509              number;
 510
 511     /* binary search for the group of names that contains the one for code */
 512     while(start<limit-1) {
 513         number=(uint16_t)((start+limit)/2);
 514         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
 515             limit=number;
 516         } else {
 517             start=number;
 518         }
 519     }
 520
 521     /* return this regardless of whether it is an exact match */
 522     return groups+start*GROUP_LENGTH;
 523 }
 524
 525 /*
 526  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
 527  * expands them into offsets and lengths for each string.
 528  * Lengths are stored with a variable-width encoding in consecutive nibbles:
 529  * If a nibble<0xc, then it is the length itself (0=empty string).
 530  * If a nibble>=0xc, then it forms a length value with the following nibble.
 531  * Calculation see below.
 532  * The offsets and lengths arrays must be at least 33 (one more) long because
 533  * there is no check here at the end if the last nibble is still used.
 534  */
 535 static const uint8_t *
 536 expandGroupLengths(const uint8_t *s,
 537                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
 538     /* read the lengths of the 32 strings in this group and get each string's offset */
 539     uint16_t i=0, offset=0, length=0;
 540     uint8_t lengthByte;
 541
 542     /* all 32 lengths must be read to get the offset of the first group string */
 543     while(i<LINES_PER_GROUP) {
 544         lengthByte=*s++;
 545
 546         /* read even nibble - MSBs of lengthByte */
 547         if(length>=12) {
 548             /* double-nibble length spread across two bytes */
 549             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
 550             lengthByte&=0xf;
 551         } else if((lengthByte /* &0xf0 */)>=0xc0) {
 552             /* double-nibble length spread across this one byte */
 553             length=(uint16_t)((lengthByte&0x3f)+12);
 554         } else {
 555             /* single-nibble length in MSBs */
 556             length=(uint16_t)(lengthByte>>4);
 557             lengthByte&=0xf;
 558         }
 559
 560         *offsets++=offset;
 561         *lengths++=length;
 562
 563         offset+=length;
 564         ++i;
 565
 566         /* read odd nibble - LSBs of lengthByte */
 567         if((lengthByte&0xf0)==0) {
 568             /* this nibble was not consumed for a double-nibble length above */
 569             length=lengthByte;
 570             if(length<12) {
 571                 /* single-nibble length in LSBs */
 572                 *offsets++=offset;
 573                 *lengths++=length;
 574
 575                 offset+=length;
 576                 ++i;
 577             }
 578         } else {
 579             length=0;   /* prevent double-nibble detection in the next iteration */
 580         }
 581     }
 582
 583     /* now, s is at the first group string */
 584     return s;
 585 }
 586
 587 static uint16_t
 588 expandGroupName(UCharNames *names, const uint16_t *group,
 589                 uint16_t lineNumber, UCharNameChoice nameChoice,
 590                 char *buffer, uint16_t bufferLength) {
 591     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 592     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 593     s=expandGroupLengths(s, offsets, lengths);
 594     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
 595                       buffer, bufferLength);
 596 }
 597
 598 static uint16_t
 599 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
 600         char *buffer, uint16_t bufferLength) {
 601     const uint16_t *group=getGroup(names, code);
 602     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
 603         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
 604                                buffer, bufferLength);
 605     } else {
 606         /* group not found */
 607         /* zero-terminate */
 608         if(bufferLength>0) {
 609             *buffer=0;
 610         }
 611         return 0;
 612     }
 613 }
 614
 615 /*
 616  * enumGroupNames() enumerates all the names in a 32-group
 617  * and either calls the enumerator function or finds a given input name.
 618  */
 619 static UBool
 620 enumGroupNames(UCharNames *names, const uint16_t *group,
 621                UChar32 start, UChar32 end,
 622                UEnumCharNamesFn *fn, void *context,
 623                UCharNameChoice nameChoice) {
 624     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 625     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 626
 627     s=expandGroupLengths(s, offsets, lengths);
 628     if(fn!=DO_FIND_NAME) {
 629         char buffer[200];
 630         uint16_t length;
 631
 632         while(start<=end) {
 633             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
 634             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
 635                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 636             }
 637             /* here, we assume that the buffer is large enough */
 638             if(length>0) {
 639                 if(!fn(context, start, nameChoice, buffer, length)) {
 640                     return FALSE;
 641                 }
 642             }
 643             ++start;
 644         }
 645     } else {
 646         const char *otherName=((FindName *)context)->otherName;
 647         while(start<=end) {
 648             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
 649                 ((FindName *)context)->code=start;
 650                 return FALSE;
 651             }
 652             ++start;
 653         }
 654     }
 655     return TRUE;
 656 }
 657
 658 /*
 659  * enumExtNames enumerate extended names.
 660  * It only needs to do it if it is called with a real function and not
 661  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
 662  * for extended names by itself.
 663  */
 664 static UBool
 665 enumExtNames(UChar32 start, UChar32 end,
 666              UEnumCharNamesFn *fn, void *context)
 667 {
 668     if(fn!=DO_FIND_NAME) {
 669         char buffer[200];
 670         uint16_t length;
 671
 672         while(start<=end) {
 673             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 674             /* here, we assume that the buffer is large enough */
 675             if(length>0) {
 676                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
 677                     return FALSE;
 678                 }
 679             }
 680             ++start;
 681         }
 682     }
 683
 684     return TRUE;
 685 }
 686
 687 static UBool
 688 enumNames(UCharNames *names,
 689           UChar32 start, UChar32 limit,
 690           UEnumCharNamesFn *fn, void *context,
 691           UCharNameChoice nameChoice) {
 692     uint16_t startGroupMSB, endGroupMSB, groupCount;
 693     const uint16_t *group, *groupLimit;
 694
 695     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
 696     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
 697
 698     /* find the group that contains start, or the highest before it */
 699     group=getGroup(names, start);
 700
 701     if(startGroupMSB==endGroupMSB) {
 702         if(startGroupMSB==group[GROUP_MSB]) {
 703             /* if start and limit-1 are in the same group, then enumerate only in that one */
 704             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
 705         }
 706     } else {
 707         const uint16_t *groups=GET_GROUPS(names);
 708         groupCount=*groups++;
 709         groupLimit=groups+groupCount*GROUP_LENGTH;
 710
 711         if(startGroupMSB==group[GROUP_MSB]) {
 712             /* enumerate characters in the partial start group */
 713             if((start&GROUP_MASK)!=0) {
 714                 if(!enumGroupNames(names, group,
 715                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
 716                                    fn, context, nameChoice)) {
 717                     return FALSE;
 718                 }
 719                 group=NEXT_GROUP(group); /* continue with the next group */
 720             }
 721         } else if(startGroupMSB>group[GROUP_MSB]) {
 722             /* make sure that we start enumerating with the first group after start */
 723             const uint16_t *nextGroup=NEXT_GROUP(group);
 724             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
 725                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 726                 if (end > limit) {
 727                     end = limit;
 728                 }
 729                 if (!enumExtNames(start, end - 1, fn, context)) {
 730                     return FALSE;
 731                 }
 732             }
 733             group=nextGroup;
 734         }
 735
 736         /* enumerate entire groups between the start- and end-groups */
 737         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
 738             const uint16_t *nextGroup;
 739             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
 740             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
 741                 return FALSE;
 742             }
 743             nextGroup=NEXT_GROUP(group);
 744             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
 745                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 746                 if (end > limit) {
 747                     end = limit;
 748                 }
 749                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
 750                     return FALSE;
 751                 }
 752             }
 753             group=nextGroup;
 754         }
 755
 756         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
 757         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
 758             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
 759         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
 760             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
 761             if (next > start) {
 762                 start = next;
 763             }
 764         } else {
 765             return TRUE;
 766         }
 767     }
 768
 769     /* we have not found a group, which means everything is made of
 770        extended names. */
 771     if (nameChoice == U_EXTENDED_CHAR_NAME) {
 772         if (limit > UCHAR_MAX_VALUE + 1) {
 773             limit = UCHAR_MAX_VALUE + 1;
 774         }
 775         return enumExtNames(start, limit - 1, fn, context);
 776     }
 777
 778     return TRUE;
 779 }
 780
 781 static uint16_t
 782 writeFactorSuffix(const uint16_t *factors, uint16_t count,
 783                   const char *s, /* suffix elements */
 784                   uint32_t code,
 785                   uint16_t indexes[8], /* output fields from here */
 786                   const char *elementBases[8], const char *elements[8],
 787                   char *buffer, uint16_t bufferLength) {
 788     uint16_t i, factor, bufferPos=0;
 789     char c;
 790
 791     /* write elements according to the factors */
 792
 793     /*
 794      * the factorized elements are determined by modulo arithmetic
 795      * with the factors of this algorithm
 796      *
 797      * note that for fewer operations, count is decremented here
 798      */
 799     --count;
 800     for(i=count; i>0; --i) {
 801         factor=factors[i];
 802         indexes[i]=(uint16_t)(code%factor);
 803         code/=factor;
 804     }
 805     /*
 806      * we don't need to calculate the last modulus because start<=code<=end
 807      * guarantees here that code<=factors[0]
 808      */
 809     indexes[0]=(uint16_t)code;
 810
 811     /* write each element */
 812     for(;;) {
 813         if(elementBases!=NULL) {
 814             *elementBases++=s;
 815         }
 816
 817         /* skip indexes[i] strings */
 818         factor=indexes[i];
 819         while(factor>0) {
 820             while(*s++!=0) {}
 821             --factor;
 822         }
 823         if(elements!=NULL) {
 824             *elements++=s;
 825         }
 826
 827         /* write element */
 828         while((c=*s++)!=0) {
 829             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 830         }
 831
 832         /* we do not need to perform the rest of this loop for i==count - break here */
 833         if(i>=count) {
 834             break;
 835         }
 836
 837         /* skip the rest of the strings for this factors[i] */
 838         factor=(uint16_t)(factors[i]-indexes[i]-1);
 839         while(factor>0) {
 840             while(*s++!=0) {}
 841             --factor;
 842         }
 843
 844         ++i;
 845     }
 846
 847     /* zero-terminate */
 848     if(bufferLength>0) {
 849         *buffer=0;
 850     }
 851
 852     return bufferPos;
 853 }
 854
 855 /*
 856  * Important:
 857  * Parts of findAlgName() are almost the same as some of getAlgName().
 858  * Fixes must be applied to both.
 859  */
 860 static uint16_t
 861 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
 862         char *buffer, uint16_t bufferLength) {
 863     uint16_t bufferPos=0;
 864
 865     /* Only the normative character name can be algorithmic. */
 866     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 867         /* zero-terminate */
 868         if(bufferLength>0) {
 869             *buffer=0;
 870         }
 871         return 0;
 872     }
 873
 874     switch(range->type) {
 875     case 0: {
 876         /* name = prefix hex-digits */
 877         const char *s=(const char *)(range+1);
 878         char c;
 879
 880         uint16_t i, count;
 881
 882         /* copy prefix */
 883         while((c=*s++)!=0) {
 884             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 885         }
 886
 887         /* write hexadecimal code point value */
 888         count=range->variant;
 889
 890         /* zero-terminate */
 891         if(count<bufferLength) {
 892             buffer[count]=0;
 893         }
 894
 895         for(i=count; i>0;) {
 896             if(--i<bufferLength) {
 897                 c=(char)(code&0xf);
 898                 if(c<10) {
 899                     c+='0';
 900                 } else {
 901                     c+='A'-10;
 902                 }
 903                 buffer[i]=c;
 904             }
 905             code>>=4;
 906         }
 907
 908         bufferPos+=count;
 909         break;
 910     }
 911     case 1: {
 912         /* name = prefix factorized-elements */
 913         uint16_t indexes[8];
 914         const uint16_t *factors=(const uint16_t *)(range+1);
 915         uint16_t count=range->variant;
 916         const char *s=(const char *)(factors+count);
 917         char c;
 918
 919         /* copy prefix */
 920         while((c=*s++)!=0) {
 921             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 922         }
 923
 924         bufferPos+=writeFactorSuffix(factors, count,
 925                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
 926         break;
 927     }
 928     default:
 929         /* undefined type */
 930         /* zero-terminate */
 931         if(bufferLength>0) {
 932             *buffer=0;
 933         }
 934         break;
 935     }
 936
 937     return bufferPos;
 938 }
 939
 940 /*
 941  * Important: enumAlgNames() and findAlgName() are almost the same.
 942  * Any fix must be applied to both.
 943  */
 944 static UBool
 945 enumAlgNames(AlgorithmicRange *range,
 946              UChar32 start, UChar32 limit,
 947              UEnumCharNamesFn *fn, void *context,
 948              UCharNameChoice nameChoice) {
 949     char buffer[200];
 950     uint16_t length;
 951
 952     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 953         return TRUE;
 954     }
 955
 956     switch(range->type) {
 957     case 0: {
 958         char *s, *end;
 959         char c;
 960
 961         /* get the full name of the start character */
 962         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
 963         if(length<=0) {
 964             return TRUE;
 965         }
 966
 967         /* call the enumerator function with this first character */
 968         if(!fn(context, start, nameChoice, buffer, length)) {
 969             return FALSE;
 970         }
 971
 972         /* go to the end of the name; all these names have the same length */
 973         end=buffer;
 974         while(*end!=0) {
 975             ++end;
 976         }
 977
 978         /* enumerate the rest of the names */
 979         while(++start<limit) {
 980             /* increment the hexadecimal number on a character-basis */
 981             s=end;
 982             for (;;) {
 983                 c=*--s;
 984                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
 985                     *s=(char)(c+1);
 986                     break;
 987                 } else if(c=='9') {
 988                     *s='A';
 989                     break;
 990                 } else if(c=='F') {
 991                     *s='0';
 992                 }
 993             }
 994
 995             if(!fn(context, start, nameChoice, buffer, length)) {
 996                 return FALSE;
 997             }
 998         }
 999         break;
1000     }
1001     case 1: {
1002         uint16_t indexes[8];
1003         const char *elementBases[8], *elements[8];
1004         const uint16_t *factors=(const uint16_t *)(range+1);
1005         uint16_t count=range->variant;
1006         const char *s=(const char *)(factors+count);
1007         char *suffix, *t;
1008         uint16_t prefixLength, i, idx;
1009
1010         char c;
1011
1012         /* name = prefix factorized-elements */
1013
1014         /* copy prefix */
1015         suffix=buffer;
1016         prefixLength=0;
1017         while((c=*s++)!=0) {
1018             *suffix++=c;
1019             ++prefixLength;
1020         }
1021
1022         /* append the suffix of the start character */
1023         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
1024                                               s, (uint32_t)start-range->start,
1025                                               indexes, elementBases, elements,
1026                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1027
1028         /* call the enumerator function with this first character */
1029         if(!fn(context, start, nameChoice, buffer, length)) {
1030             return FALSE;
1031         }
1032
1033         /* enumerate the rest of the names */
1034         while(++start<limit) {
1035             /* increment the indexes in lexical order bound by the factors */
1036             i=count;
1037             for (;;) {
1038                 idx=(uint16_t)(indexes[--i]+1);
1039                 if(idx<factors[i]) {
1040                     /* skip one index and its element string */
1041                     indexes[i]=idx;
1042                     s=elements[i];
1043                     while(*s++!=0) {
1044                     }
1045                     elements[i]=s;
1046                     break;
1047                 } else {
1048                     /* reset this index to 0 and its element string to the first one */
1049                     indexes[i]=0;
1050                     elements[i]=elementBases[i];
1051                 }
1052             }
1053
1054             /* to make matters a little easier, just append all elements to the suffix */
1055             t=suffix;
1056             length=prefixLength;
1057             for(i=0; i<count; ++i) {
1058                 s=elements[i];
1059                 while((c=*s++)!=0) {
1060                     *t++=c;
1061                     ++length;
1062                 }
1063             }
1064             /* zero-terminate */
1065             *t=0;
1066
1067             if(!fn(context, start, nameChoice, buffer, length)) {
1068                 return FALSE;
1069             }
1070         }
1071         break;
1072     }
1073     default:
1074         /* undefined type */
1075         break;
1076     }
1077
1078     return TRUE;
1079 }
1080
1081 /*
1082  * findAlgName() is almost the same as enumAlgNames() except that it
1083  * returns the code point for a name if it fits into the range.
1084  * It returns 0xffff otherwise.
1085  */
1086 static UChar32
1087 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1088     UChar32 code;
1089
1090     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
1091         return 0xffff;
1092     }
1093
1094     switch(range->type) {
1095     case 0: {
1096         /* name = prefix hex-digits */
1097         const char *s=(const char *)(range+1);
1098         char c;
1099
1100         uint16_t i, count;
1101
1102         /* compare prefix */
1103         while((c=*s++)!=0) {
1104             if((char)c!=*otherName++) {
1105                 return 0xffff;
1106             }
1107         }
1108
1109         /* read hexadecimal code point value */
1110         count=range->variant;
1111         code=0;
1112         for(i=0; i<count; ++i) {
1113             c=*otherName++;
1114             if('0'<=c && c<='9') {
1115                 code=(code<<4)|(c-'0');
1116             } else if('A'<=c && c<='F') {
1117                 code=(code<<4)|(c-'A'+10);
1118             } else {
1119                 return 0xffff;
1120             }
1121         }
1122
1123         /* does it fit into the range? */
1124         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1125             return code;
1126         }
1127         break;
1128     }
1129     case 1: {
1130         char buffer[64];
1131         uint16_t indexes[8];
1132         const char *elementBases[8], *elements[8];
1133         const uint16_t *factors=(const uint16_t *)(range+1);
1134         uint16_t count=range->variant;
1135         const char *s=(const char *)(factors+count), *t;
1136         UChar32 start, limit;
1137         uint16_t i, idx;
1138
1139         char c;
1140
1141         /* name = prefix factorized-elements */
1142
1143         /* compare prefix */
1144         while((c=*s++)!=0) {
1145             if((char)c!=*otherName++) {
1146                 return 0xffff;
1147             }
1148         }
1149
1150         start=(UChar32)range->start;
1151         limit=(UChar32)(range->end+1);
1152
1153         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1154         writeFactorSuffix(factors, count, s, 0,
1155                           indexes, elementBases, elements, buffer, sizeof(buffer));
1156
1157         /* compare the first suffix */
1158         if(0==uprv_strcmp(otherName, buffer)) {
1159             return start;
1160         }
1161
1162         /* enumerate and compare the rest of the suffixes */
1163         while(++start<limit) {
1164             /* increment the indexes in lexical order bound by the factors */
1165             i=count;
1166             for (;;) {
1167                 idx=(uint16_t)(indexes[--i]+1);
1168                 if(idx<factors[i]) {
1169                     /* skip one index and its element string */
1170                     indexes[i]=idx;
1171                     s=elements[i];
1172                     while(*s++!=0) {}
1173                     elements[i]=s;
1174                     break;
1175                 } else {
1176                     /* reset this index to 0 and its element string to the first one */
1177                     indexes[i]=0;
1178                     elements[i]=elementBases[i];
1179                 }
1180             }
1181
1182             /* to make matters a little easier, just compare all elements of the suffix */
1183             t=otherName;
1184             for(i=0; i<count; ++i) {
1185                 s=elements[i];
1186                 while((c=*s++)!=0) {
1187                     if(c!=*t++) {
1188                         s=""; /* does not match */
1189                         i=99;
1190                     }
1191                 }
1192             }
1193             if(i<99 && *t==0) {
1194                 return start;
1195             }
1196         }
1197         break;
1198     }
1199     default:
1200         /* undefined type */
1201         break;
1202     }
1203
1204     return 0xffff;
1205 }
1206
1207 /* sets of name characters, maximum name lengths ---------------------------- */
1208
1209 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1210 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1211
1212 static int32_t
1213 calcStringSetLength(uint32_t set[8], const char *s) {
1214     int32_t length=0;
1215     char c;
1216
1217     while((c=*s++)!=0) {
1218         SET_ADD(set, c);
1219         ++length;
1220     }
1221     return length;
1222 }
1223
1224 static int32_t
1225 calcAlgNameSetsLengths(int32_t maxNameLength) {
1226     AlgorithmicRange *range;
1227     uint32_t *p;
1228     uint32_t rangeCount;
1229     int32_t length;
1230
1231     /* enumerate algorithmic ranges */
1232     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1233     rangeCount=*p;
1234     range=(AlgorithmicRange *)(p+1);
1235     while(rangeCount>0) {
1236         switch(range->type) {
1237         case 0:
1238             /* name = prefix + (range->variant times) hex-digits */
1239             /* prefix */
1240             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1241             if(length>maxNameLength) {
1242                 maxNameLength=length;
1243             }
1244             break;
1245         case 1: {
1246             /* name = prefix factorized-elements */
1247             const uint16_t *factors=(const uint16_t *)(range+1);
1248             const char *s;
1249             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1250
1251             /* prefix length */
1252             s=(const char *)(factors+count);
1253             length=calcStringSetLength(gNameSet, s);
1254             s+=length+1; /* start of factor suffixes */
1255
1256             /* get the set and maximum factor suffix length for each factor */
1257             for(i=0; i<count; ++i) {
1258                 maxFactorLength=0;
1259                 for(factor=factors[i]; factor>0; --factor) {
1260                     factorLength=calcStringSetLength(gNameSet, s);
1261                     s+=factorLength+1;
1262                     if(factorLength>maxFactorLength) {
1263                         maxFactorLength=factorLength;
1264                     }
1265                 }
1266                 length+=maxFactorLength;
1267             }
1268
1269             if(length>maxNameLength) {
1270                 maxNameLength=length;
1271             }
1272             break;
1273         }
1274         default:
1275             /* unknown type */
1276             break;
1277         }
1278
1279         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1280         --rangeCount;
1281     }
1282     return maxNameLength;
1283 }
1284
1285 static int32_t
1286 calcExtNameSetsLengths(int32_t maxNameLength) {
1287     int32_t i, length;
1288
1289     for(i=0; i<LENGTHOF(charCatNames); ++i) {
1290         /*
1291          * for each category, count the length of the category name
1292          * plus 9=
1293          * 2 for <>
1294          * 1 for -
1295          * 6 for most hex digits per code point
1296          */
1297         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1298         if(length>maxNameLength) {
1299             maxNameLength=length;
1300         }
1301     }
1302     return maxNameLength;
1303 }
1304
1305 static int32_t
1306 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1307                   uint32_t set[8],
1308                   const uint8_t **pLine, const uint8_t *lineLimit) {
1309     const uint8_t *line=*pLine;
1310     int32_t length=0, tokenLength;
1311     uint16_t c, token;
1312
1313     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1314         if(c>=tokenCount) {
1315             /* implicit letter */
1316             SET_ADD(set, c);
1317             ++length;
1318         } else {
1319             token=tokens[c];
1320             if(token==(uint16_t)(-2)) {
1321                 /* this is a lead byte for a double-byte token */
1322                 c=c<<8|*line++;
1323                 token=tokens[c];
1324             }
1325             if(token==(uint16_t)(-1)) {
1326                 /* explicit letter */
1327                 SET_ADD(set, c);
1328                 ++length;
1329             } else {
1330                 /* count token word */
1331                 if(tokenLengths!=NULL) {
1332                     /* use cached token length */
1333                     tokenLength=tokenLengths[c];
1334                     if(tokenLength==0) {
1335                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1336                         tokenLengths[c]=(int8_t)tokenLength;
1337                     }
1338                 } else {
1339                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1340                 }
1341                 length+=tokenLength;
1342             }
1343         }
1344     }
1345
1346     *pLine=line;
1347     return length;
1348 }
1349
1350 static void
1351 calcGroupNameSetsLengths(int32_t maxNameLength) {
1352     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1353
1354     uint16_t *tokens=(uint16_t *)uCharNames+8;
1355     uint16_t tokenCount=*tokens++;
1356     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1357
1358     int8_t *tokenLengths;
1359
1360     const uint16_t *group;
1361     const uint8_t *s, *line, *lineLimit;
1362
1363     int32_t groupCount, lineNumber, length;
1364
1365     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1366     if(tokenLengths!=NULL) {
1367         uprv_memset(tokenLengths, 0, tokenCount);
1368     }
1369
1370     group=GET_GROUPS(uCharNames);
1371     groupCount=*group++;
1372
1373     /* enumerate all groups */
1374     while(groupCount>0) {
1375         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
1376         s=expandGroupLengths(s, offsets, lengths);
1377
1378         /* enumerate all lines in each group */
1379         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1380             line=s+offsets[lineNumber];
1381             length=lengths[lineNumber];
1382             if(length==0) {
1383                 continue;
1384             }
1385
1386             lineLimit=line+length;
1387
1388             /* read regular name */
1389             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1390             if(length>maxNameLength) {
1391                 maxNameLength=length;
1392             }
1393             if(line==lineLimit) {
1394                 continue;
1395             }
1396
1397             /* read Unicode 1.0 name */
1398             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1399             if(length>maxNameLength) {
1400                 maxNameLength=length;
1401             }
1402             if(line==lineLimit) {
1403                 continue;
1404             }
1405
1406             /* read ISO comment */
1407             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
1408         }
1409
1410         group=NEXT_GROUP(group);
1411         --groupCount;
1412     }
1413
1414     if(tokenLengths!=NULL) {
1415         uprv_free(tokenLengths);
1416     }
1417
1418     /* set gMax... - name length last for threading */
1419     gMaxNameLength=maxNameLength;
1420 }
1421
1422 static UBool
1423 calcNameSetsLengths(UErrorCode *pErrorCode) {
1424     static const char extChars[]="0123456789ABCDEF<>-";
1425     int32_t i, maxNameLength;
1426
1427     if(gMaxNameLength!=0) {
1428         return TRUE;
1429     }
1430
1431     if(!isDataLoaded(pErrorCode)) {
1432         return FALSE;
1433     }
1434
1435     /* set hex digits, used in various names, and <>-, used in extended names */
1436     for(i=0; i<sizeof(extChars)-1; ++i) {
1437         SET_ADD(gNameSet, extChars[i]);
1438     }
1439
1440     /* set sets and lengths from algorithmic names */
1441     maxNameLength=calcAlgNameSetsLengths(0);
1442
1443     /* set sets and lengths from extended names */
1444     maxNameLength=calcExtNameSetsLengths(maxNameLength);
1445
1446     /* set sets and lengths from group names, set global maximum values */
1447     calcGroupNameSetsLengths(maxNameLength);
1448
1449     return TRUE;
1450 }
1451
1452 /* public API --------------------------------------------------------------- */
1453
1454 U_CAPI int32_t U_EXPORT2
1455 u_charName(UChar32 code, UCharNameChoice nameChoice,
1456            char *buffer, int32_t bufferLength,
1457            UErrorCode *pErrorCode) {
1458     AlgorithmicRange *algRange;
1459     uint32_t *p;
1460     uint32_t i;
1461     int32_t length;
1462
1463     /* check the argument values */
1464     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1465         return 0;
1466     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1467               bufferLength<0 || (bufferLength>0 && buffer==NULL)
1468     ) {
1469         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1470         return 0;
1471     }
1472
1473     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1474         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1475     }
1476
1477     length=0;
1478
1479     /* try algorithmic names first */
1480     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1481     i=*p;
1482     algRange=(AlgorithmicRange *)(p+1);
1483     while(i>0) {
1484         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1485             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1486             break;
1487         }
1488         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1489         --i;
1490     }
1491
1492     if(i==0) {
1493         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1494             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1495             if (!length) {
1496                 /* extended character name */
1497                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1498             }
1499         } else {
1500             /* normal character name */
1501             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1502         }
1503     }
1504
1505     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1506 }
1507
1508 U_CAPI int32_t U_EXPORT2
1509 u_getISOComment(UChar32 c,
1510                 char *dest, int32_t destCapacity,
1511                 UErrorCode *pErrorCode) {
1512     int32_t length;
1513
1514     /* check the argument values */
1515     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1516         return 0;
1517     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1518         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1519         return 0;
1520     }
1521
1522     if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1523         return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1524     }
1525
1526     /* the ISO comment is stored like a normal character name */
1527     length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
1528     return u_terminateChars(dest, destCapacity, length, pErrorCode);
1529 }
1530
1531 U_CAPI UChar32 U_EXPORT2
1532 u_charFromName(UCharNameChoice nameChoice,
1533                const char *name,
1534                UErrorCode *pErrorCode) {
1535     char upper[120], lower[120];
1536     FindName findName;
1537     AlgorithmicRange *algRange;
1538     uint32_t *p;
1539     uint32_t i;
1540     UChar32 cp = 0;
1541     char c0;
1542     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
1543
1544     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1545         return error;
1546     }
1547
1548     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1549         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1550         return error;
1551     }
1552
1553     if(!isDataLoaded(pErrorCode)) {
1554         return error;
1555     }
1556
1557     /* construct the uppercase and lowercase of the name first */
1558     for(i=0; i<sizeof(upper); ++i) {
1559         if((c0=*name++)!=0) {
1560             upper[i]=uprv_toupper(c0);
1561             lower[i]=uprv_tolower(c0);
1562         } else {
1563             upper[i]=lower[i]=0;
1564             break;
1565         }
1566     }
1567     if(i==sizeof(upper)) {
1568         /* name too long, there is no such character */
1569         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1570         return error;
1571     }
1572
1573     /* try extended names first */
1574     if (lower[0] == '<') {
1575         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1576             if (lower[--i] == '>') {
1577                 for (--i; lower[i] && lower[i] != '-'; --i) {
1578                 }
1579
1580                 if (lower[i] == '-') { /* We've got a category. */
1581                     uint32_t cIdx;
1582
1583                     lower[i] = 0;
1584
1585                     for (++i; lower[i] != '>'; ++i) {
1586                         if (lower[i] >= '0' && lower[i] <= '9') {
1587                             cp = (cp << 4) + lower[i] - '0';
1588                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1589                             cp = (cp << 4) + lower[i] - 'a' + 10;
1590                         } else {
1591                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1592                             return error;
1593                         }
1594                     }
1595
1596                     /* Now validate the category name.
1597                        We could use a binary search, or a trie, if
1598                        we really wanted to. */
1599
1600                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
1601
1602                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1603                             if (getCharCat(cp) == cIdx) {
1604                                 return cp;
1605                             }
1606                             break;
1607                         }
1608                     }
1609                 }
1610             }
1611         }
1612
1613         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1614         return error;
1615     }
1616
1617     /* try algorithmic names now */
1618     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1619     i=*p;
1620     algRange=(AlgorithmicRange *)(p+1);
1621     while(i>0) {
1622         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1623             return cp;
1624         }
1625         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1626         --i;
1627     }
1628
1629     /* normal character name */
1630     findName.otherName=upper;
1631     findName.code=error;
1632     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1633     if (findName.code == error) {
1634          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1635     }
1636     return findName.code;
1637 }
1638
1639 U_CAPI void U_EXPORT2
1640 u_enumCharNames(UChar32 start, UChar32 limit,
1641                 UEnumCharNamesFn *fn,
1642                 void *context,
1643                 UCharNameChoice nameChoice,
1644                 UErrorCode *pErrorCode) {
1645     AlgorithmicRange *algRange;
1646     uint32_t *p;
1647     uint32_t i;
1648
1649     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1650         return;
1651     }
1652
1653     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1654         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1655         return;
1656     }
1657
1658     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1659         limit = UCHAR_MAX_VALUE + 1;
1660     }
1661     if((uint32_t)start>=(uint32_t)limit) {
1662         return;
1663     }
1664
1665     if(!isDataLoaded(pErrorCode)) {
1666         return;
1667     }
1668
1669     /* interleave the data-driven ones with the algorithmic ones */
1670     /* iterate over all algorithmic ranges; assume that they are in ascending order */
1671     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1672     i=*p;
1673     algRange=(AlgorithmicRange *)(p+1);
1674     while(i>0) {
1675         /* enumerate the character names before the current algorithmic range */
1676         /* here: start<limit */
1677         if((uint32_t)start<algRange->start) {
1678             if((uint32_t)limit<=algRange->start) {
1679                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1680                 return;
1681             }
1682             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1683                 return;
1684             }
1685             start=(UChar32)algRange->start;
1686         }
1687         /* enumerate the character names in the current algorithmic range */
1688         /* here: algRange->start<=start<limit */
1689         if((uint32_t)start<=algRange->end) {
1690             if((uint32_t)limit<=(algRange->end+1)) {
1691                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1692                 return;
1693             }
1694             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1695                 return;
1696             }
1697             start=(UChar32)algRange->end+1;
1698         }
1699         /* continue to the next algorithmic range (here: start<limit) */
1700         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1701         --i;
1702     }
1703     /* enumerate the character names after the last algorithmic range */
1704     enumNames(uCharNames, start, limit, fn, context, nameChoice);
1705 }
1706
1707 U_CAPI int32_t U_EXPORT2
1708 uprv_getMaxCharNameLength() {
1709     UErrorCode errorCode=U_ZERO_ERROR;
1710     if(calcNameSetsLengths(&errorCode)) {
1711         return gMaxNameLength;
1712     } else {
1713         return 0;
1714     }
1715 }
1716
1717 /**
1718  * Converts the char set cset into a Unicode set uset.
1719  * @param cset Set of 256 bit flags corresponding to a set of chars.
1720  * @param uset USet to receive characters. Existing contents are deleted.
1721  */
1722 static void
1723 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
1724     UChar us[256];
1725     char cs[256];
1726
1727     int32_t i, length;
1728     UErrorCode errorCode;
1729
1730     errorCode=U_ZERO_ERROR;
1731
1732     if(!calcNameSetsLengths(&errorCode)) {
1733         return;
1734     }
1735
1736     /* build a char string with all chars that are used in character names */
1737     length=0;
1738     for(i=0; i<256; ++i) {
1739         if(SET_CONTAINS(cset, i)) {
1740             cs[length++]=(char)i;
1741         }
1742     }
1743
1744     /* convert the char string to a UChar string */
1745     u_charsToUChars(cs, us, length);
1746
1747     /* add each UChar to the USet */
1748     for(i=0; i<length; ++i) {
1749         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1750             sa->add(sa->set, us[i]);
1751         }
1752     }
1753 }
1754
1755 /**
1756  * Fills set with characters that are used in Unicode character names.
1757  * @param set USet to receive characters.
1758  */
1759 U_CAPI void U_EXPORT2
1760 uprv_getCharNameCharacters(const USetAdder *sa) {
1761     charSetToUSet(gNameSet, sa);
1762 }
1763
1764 /* data swapping ------------------------------------------------------------ */
1765
1766 /*
1767  * The token table contains non-negative entries for token bytes,
1768  * and -1 for bytes that represent themselves in the data file's charset.
1769  * -2 entries are used for lead bytes.
1770  *
1771  * Direct bytes (-1 entries) must be translated from the input charset family
1772  * to the output charset family.
1773  * makeTokenMap() writes a permutation mapping for this.
1774  * Use it once for single-/lead-byte tokens and once more for all trail byte
1775  * tokens. (';' is an unused trail byte marked with -1.)
1776  */
1777 static void
1778 makeTokenMap(const UDataSwapper *ds,
1779              int16_t tokens[], uint16_t tokenCount,
1780              uint8_t map[256],
1781              UErrorCode *pErrorCode) {
1782     UBool usedOutChar[256];
1783     uint16_t i, j;
1784     uint8_t c1, c2;
1785
1786     if(U_FAILURE(*pErrorCode)) {
1787         return;
1788     }
1789
1790     if(ds->inCharset==ds->outCharset) {
1791         /* Same charset family: identity permutation */
1792         for(i=0; i<256; ++i) {
1793             map[i]=(uint8_t)i;
1794         }
1795     } else {
1796         uprv_memset(map, 0, 256);
1797         uprv_memset(usedOutChar, 0, 256);
1798
1799         if(tokenCount>256) {
1800             tokenCount=256;
1801         }
1802
1803         /* set the direct bytes (byte 0 always maps to itself) */
1804         for(i=1; i<tokenCount; ++i) {
1805             if(tokens[i]==-1) {
1806                 /* convert the direct byte character */
1807                 c1=(uint8_t)i;
1808                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1809                 if(U_FAILURE(*pErrorCode)) {
1810                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1811                                      i, ds->inCharset);
1812                     return;
1813                 }
1814
1815                 /* enter the converted character into the map and mark it used */
1816                 map[c1]=c2;
1817                 usedOutChar[c2]=TRUE;
1818             }
1819         }
1820
1821         /* set the mappings for the rest of the permutation */
1822         for(i=j=1; i<tokenCount; ++i) {
1823             /* set mappings that were not set for direct bytes */
1824             if(map[i]==0) {
1825                 /* set an output byte value that was not used as an output byte above */
1826                 while(usedOutChar[j]) {
1827                     ++j;
1828                 }
1829                 map[i]=(uint8_t)j++;
1830             }
1831         }
1832
1833         /*
1834          * leave mappings at tokenCount and above unset if tokenCount<256
1835          * because they won't be used
1836          */
1837     }
1838 }
1839
1840 U_CAPI int32_t U_EXPORT2
1841 uchar_swapNames(const UDataSwapper *ds,
1842                 const void *inData, int32_t length, void *outData,
1843                 UErrorCode *pErrorCode) {
1844     const UDataInfo *pInfo;
1845     int32_t headerSize;
1846
1847     const uint8_t *inBytes;
1848     uint8_t *outBytes;
1849
1850     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1851              offset, i, count, stringsCount;
1852
1853     const AlgorithmicRange *inRange;
1854     AlgorithmicRange *outRange;
1855
1856     /* udata_swapDataHeader checks the arguments */
1857     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1858     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1859         return 0;
1860     }
1861
1862     /* check data format and format version */
1863     pInfo=(const UDataInfo *)((const char *)inData+4);
1864     if(!(
1865         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
1866         pInfo->dataFormat[1]==0x6e &&
1867         pInfo->dataFormat[2]==0x61 &&
1868         pInfo->dataFormat[3]==0x6d &&
1869         pInfo->formatVersion[0]==1
1870     )) {
1871         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1872                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1873                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1874                          pInfo->formatVersion[0]);
1875         *pErrorCode=U_UNSUPPORTED_ERROR;
1876         return 0;
1877     }
1878
1879     inBytes=(const uint8_t *)inData+headerSize;
1880     outBytes=(uint8_t *)outData+headerSize;
1881     if(length<0) {
1882         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1883     } else {
1884         length-=headerSize;
1885         if( length<20 ||
1886             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1887         ) {
1888             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1889                              length);
1890             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1891             return 0;
1892         }
1893     }
1894
1895     if(length<0) {
1896         /* preflighting: iterate through algorithmic ranges */
1897         offset=algNamesOffset;
1898         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1899         offset+=4;
1900
1901         for(i=0; i<count; ++i) {
1902             inRange=(const AlgorithmicRange *)(inBytes+offset);
1903             offset+=ds->readUInt16(inRange->size);
1904         }
1905     } else {
1906         /* swap data */
1907         const uint16_t *p;
1908         uint16_t *q, *temp;
1909
1910         int16_t tokens[512];
1911         uint16_t tokenCount;
1912
1913         uint8_t map[256], trailMap[256];
1914
1915         /* copy the data for inaccessible bytes */
1916         if(inBytes!=outBytes) {
1917             uprv_memcpy(outBytes, inBytes, length);
1918         }
1919
1920         /* the initial 4 offsets first */
1921         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1922         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1923         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1924         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1925
1926         /*
1927          * now the tokens table
1928          * it needs to be permutated along with the compressed name strings
1929          */
1930         p=(const uint16_t *)(inBytes+16);
1931         q=(uint16_t *)(outBytes+16);
1932
1933         /* read and swap the tokenCount */
1934         tokenCount=ds->readUInt16(*p);
1935         ds->swapArray16(ds, p, 2, q, pErrorCode);
1936         ++p;
1937         ++q;
1938
1939         /* read the first 512 tokens and make the token maps */
1940         if(tokenCount<=512) {
1941             count=tokenCount;
1942         } else {
1943             count=512;
1944         }
1945         for(i=0; i<count; ++i) {
1946             tokens[i]=udata_readInt16(ds, p[i]);
1947         }
1948         for(; i<512; ++i) {
1949             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1950         }
1951         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1952         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1953         if(U_FAILURE(*pErrorCode)) {
1954             return 0;
1955         }
1956
1957         /*
1958          * swap and permutate the tokens
1959          * go through a temporary array to support in-place swapping
1960          */
1961         temp=(uint16_t *)uprv_malloc(tokenCount*2);
1962         if(temp==NULL) {
1963             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1964                              tokenCount);
1965             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1966             return 0;
1967         }
1968
1969         /* swap and permutate single-/lead-byte tokens */
1970         for(i=0; i<tokenCount && i<256; ++i) {
1971             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1972         }
1973
1974         /* swap and permutate trail-byte tokens */
1975         for(; i<tokenCount; ++i) {
1976             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1977         }
1978
1979         /* copy the result into the output and free the temporary array */
1980         uprv_memcpy(q, temp, tokenCount*2);
1981         uprv_free(temp);
1982
1983         /*
1984          * swap the token strings but not a possible padding byte after
1985          * the terminating NUL of the last string
1986          */
1987         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1988                                     outBytes+tokenStringOffset, pErrorCode);
1989         if(U_FAILURE(*pErrorCode)) {
1990             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1991             return 0;
1992         }
1993
1994         /* swap the group table */
1995         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
1996         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
1997                            outBytes+groupsOffset, pErrorCode);
1998
1999         /*
2000          * swap the group strings
2001          * swap the string bytes but not the nibble-encoded string lengths
2002          */
2003         if(ds->inCharset!=ds->outCharset) {
2004             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
2005
2006             const uint8_t *inStrings, *nextInStrings;
2007             uint8_t *outStrings;
2008
2009             uint8_t c;
2010
2011             inStrings=inBytes+groupStringOffset;
2012             outStrings=outBytes+groupStringOffset;
2013
2014             stringsCount=algNamesOffset-groupStringOffset;
2015
2016             /* iterate through string groups until only a few padding bytes are left */
2017             while(stringsCount>32) {
2018                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2019
2020                 /* move past the length bytes */
2021                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
2022                 outStrings+=nextInStrings-inStrings;
2023                 inStrings=nextInStrings;
2024
2025                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2026                 stringsCount-=count;
2027
2028                 /* swap the string bytes using map[] and trailMap[] */
2029                 while(count>0) {
2030                     c=*inStrings++;
2031                     *outStrings++=map[c];
2032                     if(tokens[c]!=-2) {
2033                         --count;
2034                     } else {
2035                         /* token lead byte: swap the trail byte, too */
2036                         *outStrings++=trailMap[*inStrings++];
2037                         count-=2;
2038                     }
2039                 }
2040             }
2041         }
2042
2043         /* swap the algorithmic ranges */
2044         offset=algNamesOffset;
2045         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2046         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2047         offset+=4;
2048
2049         for(i=0; i<count; ++i) {
2050             if(offset>(uint32_t)length) {
2051                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2052                                  length, i);
2053                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2054                 return 0;
2055             }
2056
2057             inRange=(const AlgorithmicRange *)(inBytes+offset);
2058             outRange=(AlgorithmicRange *)(outBytes+offset);
2059             offset+=ds->readUInt16(inRange->size);
2060
2061             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2062             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2063             switch(inRange->type) {
2064             case 0:
2065                 /* swap prefix string */
2066                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2067                                     outRange+1, pErrorCode);
2068                 if(U_FAILURE(*pErrorCode)) {
2069                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2070                                      i);
2071                     return 0;
2072                 }
2073                 break;
2074             case 1:
2075                 {
2076                     /* swap factors and the prefix and factor strings */
2077                     uint32_t factorsCount;
2078
2079                     factorsCount=inRange->variant;
2080                     p=(const uint16_t *)(inRange+1);
2081                     q=(uint16_t *)(outRange+1);
2082                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2083
2084                     /* swap the strings, up to the last terminating NUL */
2085                     p+=factorsCount;
2086                     q+=factorsCount;
2087                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2088                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2089                         --stringsCount;
2090                     }
2091                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2092                 }
2093                 break;
2094             default:
2095                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2096                                  inRange->type, i);
2097                 *pErrorCode=U_UNSUPPORTED_ERROR;
2098                 return 0;
2099             }
2100         }
2101     }
2102
2103     return headerSize+(int32_t)offset;
2104 }
2105
2106 /*
2107  * Hey, Emacs, please set the following:
2108  *
2109  * Local Variables:
2110  * indent-tabs-mode: nil
2111  * End:
2112  *
2113  */