src/third_party/icu/source/i18n/choicfmt.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 *
   7 * File CHOICFMT.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   02/19/97    aliu        Converted from java.
  13 *   03/20/97    helena      Finished first cut of implementation and got rid
  14 *                           of nextDouble/previousDouble and replaced with
  15 *                           boolean array.
  16 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
  17 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
  18 *                           wchar.h.
  19 *   07/09/97    helena      Made ParsePosition into a class.
  20 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
  21 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
  22 *   02/22/99    stephen     Removed character literals for EBCDIC safety
  23 ********************************************************************************
  24 */
  25
  26 #include "unicode/utypes.h"
  27
  28 #if !UCONFIG_NO_FORMATTING
  29
  30 #include "unicode/choicfmt.h"
  31 #include "unicode/numfmt.h"
  32 #include "unicode/locid.h"
  33 #include "cpputils.h"
  34 #include "cstring.h"
  35 #include "messageimpl.h"
  36 #include "putilimp.h"
  37 #include "uassert.h"
  38 #include <stdio.h>
  39 #include <float.h>
  40
  41 // *****************************************************************************
  42 // class ChoiceFormat
  43 // *****************************************************************************
  44
  45 U_NAMESPACE_BEGIN
  46
  47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
  48
  49 // Special characters used by ChoiceFormat.  There are two characters
  50 // used interchangeably to indicate <=.  Either is parsed, but only
  51 // LESS_EQUAL is generated by toPattern().
  52 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
  53 #define LESS_THAN    ((UChar)0x003C)   /*<*/
  54 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
  55 #define LESS_EQUAL2  ((UChar)0x2264)
  56 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
  57 #define MINUS        ((UChar)0x002D)   /*-*/
  58
  59 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
  60 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
  61
  62 #ifdef INFINITY
  63 #undef INFINITY
  64 #endif
  65 #define INFINITY     ((UChar)0x221E)
  66
  67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
  68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
  69 #define POSITIVE_INF_STRLEN 1
  70 #define NEGATIVE_INF_STRLEN 2
  71
  72 // -------------------------------------
  73 // Creates a ChoiceFormat instance based on the pattern.
  74
  75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
  76                            UErrorCode& status)
  77 : constructorErrorCode(status),
  78   msgPattern(status)
  79 {
  80     applyPattern(newPattern, status);
  81 }
  82
  83 // -------------------------------------
  84 // Creates a ChoiceFormat instance with the limit array and
  85 // format strings for each limit.
  86
  87 ChoiceFormat::ChoiceFormat(const double* limits,
  88                            const UnicodeString* formats,
  89                            int32_t cnt )
  90 : constructorErrorCode(U_ZERO_ERROR),
  91   msgPattern(constructorErrorCode)
  92 {
  93     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
  94 }
  95
  96 // -------------------------------------
  97
  98 ChoiceFormat::ChoiceFormat(const double* limits,
  99                            const UBool* closures,
 100                            const UnicodeString* formats,
 101                            int32_t cnt )
 102 : constructorErrorCode(U_ZERO_ERROR),
 103   msgPattern(constructorErrorCode)
 104 {
 105     setChoices(limits, closures, formats, cnt, constructorErrorCode);
 106 }
 107
 108 // -------------------------------------
 109 // copy constructor
 110
 111 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
 112 : NumberFormat(that),
 113   constructorErrorCode(that.constructorErrorCode),
 114   msgPattern(that.msgPattern)
 115 {
 116 }
 117
 118 // -------------------------------------
 119 // Private constructor that creates a
 120 // ChoiceFormat instance based on the
 121 // pattern and populates UParseError
 122
 123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
 124                            UParseError& parseError,
 125                            UErrorCode& status)
 126 : constructorErrorCode(status),
 127   msgPattern(status)
 128 {
 129     applyPattern(newPattern,parseError, status);
 130 }
 131 // -------------------------------------
 132
 133 UBool
 134 ChoiceFormat::operator==(const Format& that) const
 135 {
 136     if (this == &that) return TRUE;
 137     if (!NumberFormat::operator==(that)) return FALSE;
 138     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
 139     return msgPattern == thatAlias.msgPattern;
 140 }
 141
 142 // -------------------------------------
 143 // copy constructor
 144
 145 const ChoiceFormat&
 146 ChoiceFormat::operator=(const   ChoiceFormat& that)
 147 {
 148     if (this != &that) {
 149         NumberFormat::operator=(that);
 150         constructorErrorCode = that.constructorErrorCode;
 151         msgPattern = that.msgPattern;
 152     }
 153     return *this;
 154 }
 155
 156 // -------------------------------------
 157
 158 ChoiceFormat::~ChoiceFormat()
 159 {
 160 }
 161
 162 // -------------------------------------
 163
 164 /**
 165  * Convert a double value to a string without the overhead of NumberFormat.
 166  */
 167 UnicodeString&
 168 ChoiceFormat::dtos(double value,
 169                    UnicodeString& string)
 170 {
 171     /* Buffer to contain the digits and any extra formatting stuff. */
 172     char temp[DBL_DIG + 16];
 173     char *itrPtr = temp;
 174     char *expPtr;
 175
 176     sprintf(temp, "%.*g", DBL_DIG, value);
 177
 178     /* Find and convert the decimal point.
 179        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
 180     */
 181     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
 182         itrPtr++;
 183     }
 184     if (*itrPtr != 0 && *itrPtr != 'e') {
 185         /* We reached something that looks like a decimal point.
 186         In case someone used setlocale(), which changes the decimal point. */
 187         *itrPtr = '.';
 188         itrPtr++;
 189     }
 190     /* Search for the exponent */
 191     while (*itrPtr && *itrPtr != 'e') {
 192         itrPtr++;
 193     }
 194     if (*itrPtr == 'e') {
 195         itrPtr++;
 196         /* Verify the exponent sign */
 197         if (*itrPtr == '+' || *itrPtr == '-') {
 198             itrPtr++;
 199         }
 200         /* Remove leading zeros. You will see this on Windows machines. */
 201         expPtr = itrPtr;
 202         while (*itrPtr == '0') {
 203             itrPtr++;
 204         }
 205         if (*itrPtr && expPtr != itrPtr) {
 206             /* Shift the exponent without zeros. */
 207             while (*itrPtr) {
 208                 *(expPtr++)  = *(itrPtr++);
 209             }
 210             // NULL terminate
 211             *expPtr = 0;
 212         }
 213     }
 214
 215     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
 216     return string;
 217 }
 218
 219 // -------------------------------------
 220 // calls the overloaded applyPattern method.
 221
 222 void
 223 ChoiceFormat::applyPattern(const UnicodeString& pattern,
 224                            UErrorCode& status)
 225 {
 226     msgPattern.parseChoiceStyle(pattern, NULL, status);
 227     constructorErrorCode = status;
 228 }
 229
 230 // -------------------------------------
 231 // Applies the pattern to this ChoiceFormat instance.
 232
 233 void
 234 ChoiceFormat::applyPattern(const UnicodeString& pattern,
 235                            UParseError& parseError,
 236                            UErrorCode& status)
 237 {
 238     msgPattern.parseChoiceStyle(pattern, &parseError, status);
 239     constructorErrorCode = status;
 240 }
 241 // -------------------------------------
 242 // Returns the input pattern string.
 243
 244 UnicodeString&
 245 ChoiceFormat::toPattern(UnicodeString& result) const
 246 {
 247     return result = msgPattern.getPatternString();
 248 }
 249
 250 // -------------------------------------
 251 // Sets the limit and format arrays.
 252 void
 253 ChoiceFormat::setChoices(  const double* limits,
 254                            const UnicodeString* formats,
 255                            int32_t cnt )
 256 {
 257     UErrorCode errorCode = U_ZERO_ERROR;
 258     setChoices(limits, NULL, formats, cnt, errorCode);
 259 }
 260
 261 // -------------------------------------
 262 // Sets the limit and format arrays.
 263 void
 264 ChoiceFormat::setChoices(  const double* limits,
 265                            const UBool* closures,
 266                            const UnicodeString* formats,
 267                            int32_t cnt )
 268 {
 269     UErrorCode errorCode = U_ZERO_ERROR;
 270     setChoices(limits, closures, formats, cnt, errorCode);
 271 }
 272
 273 void
 274 ChoiceFormat::setChoices(const double* limits,
 275                          const UBool* closures,
 276                          const UnicodeString* formats,
 277                          int32_t count,
 278                          UErrorCode &errorCode) {
 279     if (U_FAILURE(errorCode)) {
 280         return;
 281     }
 282     if (limits == NULL || formats == NULL) {
 283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 284         return;
 285     }
 286     // Reconstruct the original input pattern.
 287     // Modified version of the pre-ICU 4.8 toPattern() implementation.
 288     UnicodeString result;
 289     for (int32_t i = 0; i < count; ++i) {
 290         if (i != 0) {
 291             result += VERTICAL_BAR;
 292         }
 293         UnicodeString buf;
 294         if (uprv_isPositiveInfinity(limits[i])) {
 295             result += INFINITY;
 296         } else if (uprv_isNegativeInfinity(limits[i])) {
 297             result += MINUS;
 298             result += INFINITY;
 299         } else {
 300             result += dtos(limits[i], buf);
 301         }
 302         if (closures != NULL && closures[i]) {
 303             result += LESS_THAN;
 304         } else {
 305             result += LESS_EQUAL;
 306         }
 307         // Append formats[i], using quotes if there are special
 308         // characters.  Single quotes themselves must be escaped in
 309         // either case.
 310         const UnicodeString& text = formats[i];
 311         int32_t textLength = text.length();
 312         int32_t nestingLevel = 0;
 313         for (int32_t j = 0; j < textLength; ++j) {
 314             UChar c = text[j];
 315             if (c == SINGLE_QUOTE && nestingLevel == 0) {
 316                 // Double each top-level apostrophe.
 317                 result.append(c);
 318             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
 319                 // Surround each pipe symbol with apostrophes for quoting.
 320                 // If the next character is an apostrophe, then that will be doubled,
 321                 // and although the parser will see the apostrophe pairs beginning
 322                 // and ending one character earlier than our doubling, the result
 323                 // is as desired.
 324                 //   | -> '|'
 325                 //   |' -> '|'''
 326                 //   |'' -> '|''''' etc.
 327                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
 328                 continue;  // Skip the append(c) at the end of the loop body.
 329             } else if (c == LEFT_CURLY_BRACE) {
 330                 ++nestingLevel;
 331             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
 332                 --nestingLevel;
 333             }
 334             result.append(c);
 335         }
 336     }
 337     // Apply the reconstructed pattern.
 338     applyPattern(result, errorCode);
 339 }
 340
 341 // -------------------------------------
 342 // Gets the limit array.
 343
 344 const double*
 345 ChoiceFormat::getLimits(int32_t& cnt) const
 346 {
 347     cnt = 0;
 348     return NULL;
 349 }
 350
 351 // -------------------------------------
 352 // Gets the closures array.
 353
 354 const UBool*
 355 ChoiceFormat::getClosures(int32_t& cnt) const
 356 {
 357     cnt = 0;
 358     return NULL;
 359 }
 360
 361 // -------------------------------------
 362 // Gets the format array.
 363
 364 const UnicodeString*
 365 ChoiceFormat::getFormats(int32_t& cnt) const
 366 {
 367     cnt = 0;
 368     return NULL;
 369 }
 370
 371 // -------------------------------------
 372 // Formats an int64 number, it's actually formatted as
 373 // a double.  The returned format string may differ
 374 // from the input number because of this.
 375
 376 UnicodeString&
 377 ChoiceFormat::format(int64_t number,
 378                      UnicodeString& appendTo,
 379                      FieldPosition& status) const
 380 {
 381     return format((double) number, appendTo, status);
 382 }
 383
 384 // -------------------------------------
 385 // Formats an int32_t number, it's actually formatted as
 386 // a double.
 387
 388 UnicodeString&
 389 ChoiceFormat::format(int32_t number,
 390                      UnicodeString& appendTo,
 391                      FieldPosition& status) const
 392 {
 393     return format((double) number, appendTo, status);
 394 }
 395
 396 // -------------------------------------
 397 // Formats a double number.
 398
 399 UnicodeString&
 400 ChoiceFormat::format(double number,
 401                      UnicodeString& appendTo,
 402                      FieldPosition& /*pos*/) const
 403 {
 404     if (msgPattern.countParts() == 0) {
 405         // No pattern was applied, or it failed.
 406         return appendTo;
 407     }
 408     // Get the appropriate sub-message.
 409     int32_t msgStart = findSubMessage(msgPattern, 0, number);
 410     if (!MessageImpl::jdkAposMode(msgPattern)) {
 411         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
 412         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
 413         appendTo.append(msgPattern.getPatternString(),
 414                         patternStart,
 415                         msgPattern.getPatternIndex(msgLimit) - patternStart);
 416         return appendTo;
 417     }
 418     // JDK compatibility mode: Remove SKIP_SYNTAX.
 419     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
 420 }
 421
 422 int32_t
 423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
 424     int32_t count = pattern.countParts();
 425     int32_t msgStart;
 426     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
 427     // until ARG_LIMIT or end of choice-only pattern.
 428     // Ignore the first number and selector and start the loop on the first message.
 429     partIndex += 2;
 430     for (;;) {
 431         // Skip but remember the current sub-message.
 432         msgStart = partIndex;
 433         partIndex = pattern.getLimitPartIndex(partIndex);
 434         if (++partIndex >= count) {
 435             // Reached the end of the choice-only pattern.
 436             // Return with the last sub-message.
 437             break;
 438         }
 439         const MessagePattern::Part &part = pattern.getPart(partIndex++);
 440         UMessagePatternPartType type = part.getType();
 441         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
 442             // Reached the end of the ChoiceFormat style.
 443             // Return with the last sub-message.
 444             break;
 445         }
 446         // part is an ARG_INT or ARG_DOUBLE
 447         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
 448         double boundary = pattern.getNumericValue(part);
 449         // Fetch the ARG_SELECTOR character.
 450         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
 451         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
 452         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
 453             // The number is in the interval between the previous boundary and the current one.
 454             // Return with the sub-message between them.
 455             // The !(a>b) and !(a>=b) comparisons are equivalent to
 456             // (a<=b) and (a<b) except they "catch" NaN.
 457             break;
 458         }
 459     }
 460     return msgStart;
 461 }
 462
 463 // -------------------------------------
 464 // Formats an array of objects. Checks if the data type of the objects
 465 // to get the right value for formatting.
 466
 467 UnicodeString&
 468 ChoiceFormat::format(const Formattable* objs,
 469                      int32_t cnt,
 470                      UnicodeString& appendTo,
 471                      FieldPosition& pos,
 472                      UErrorCode& status) const
 473 {
 474     if(cnt < 0) {
 475         status = U_ILLEGAL_ARGUMENT_ERROR;
 476         return appendTo;
 477     }
 478     if (msgPattern.countParts() == 0) {
 479         status = U_INVALID_STATE_ERROR;
 480         return appendTo;
 481     }
 482
 483     for (int32_t i = 0; i < cnt; i++) {
 484         double objDouble = objs[i].getDouble(status);
 485         if (U_SUCCESS(status)) {
 486             format(objDouble, appendTo, pos);
 487         }
 488     }
 489
 490     return appendTo;
 491 }
 492
 493 // -------------------------------------
 494
 495 void
 496 ChoiceFormat::parse(const UnicodeString& text,
 497                     Formattable& result,
 498                     ParsePosition& pos) const
 499 {
 500     result.setDouble(parseArgument(msgPattern, 0, text, pos));
 501 }
 502
 503 double
 504 ChoiceFormat::parseArgument(
 505         const MessagePattern &pattern, int32_t partIndex,
 506         const UnicodeString &source, ParsePosition &pos) {
 507     // find the best number (defined as the one with the longest parse)
 508     int32_t start = pos.getIndex();
 509     int32_t furthest = start;
 510     double bestNumber = uprv_getNaN();
 511     double tempNumber = 0.0;
 512     int32_t count = pattern.countParts();
 513     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
 514         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
 515         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
 516         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
 517         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
 518         if (len >= 0) {
 519             int32_t newIndex = start + len;
 520             if (newIndex > furthest) {
 521                 furthest = newIndex;
 522                 bestNumber = tempNumber;
 523                 if (furthest == source.length()) {
 524                     break;
 525                 }
 526             }
 527         }
 528         partIndex = msgLimit + 1;
 529     }
 530     if (furthest == start) {
 531         pos.setErrorIndex(start);
 532     } else {
 533         pos.setIndex(furthest);
 534     }
 535     return bestNumber;
 536 }
 537
 538 int32_t
 539 ChoiceFormat::matchStringUntilLimitPart(
 540         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
 541         const UnicodeString &source, int32_t sourceOffset) {
 542     int32_t matchingSourceLength = 0;
 543     const UnicodeString &msgString = pattern.getPatternString();
 544     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
 545     for (;;) {
 546         const MessagePattern::Part &part = pattern.getPart(++partIndex);
 547         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
 548             int32_t index = part.getIndex();
 549             int32_t length = index - prevIndex;
 550             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
 551                 return -1;  // mismatch
 552             }
 553             matchingSourceLength += length;
 554             if (partIndex == limitPartIndex) {
 555                 return matchingSourceLength;
 556             }
 557             prevIndex = part.getLimit();  // SKIP_SYNTAX
 558         }
 559     }
 560 }
 561
 562 // -------------------------------------
 563
 564 Format*
 565 ChoiceFormat::clone() const
 566 {
 567     ChoiceFormat *aCopy = new ChoiceFormat(*this);
 568     return aCopy;
 569 }
 570
 571 U_NAMESPACE_END
 572
 573 #endif /* #if !UCONFIG_NO_FORMATTING */
 574
 575 //eof