src/third_party/icu/source/i18n/nfrs.cpp

   1 /*
   2 ******************************************************************************
   3 *   Copyright (C) 1997-2012, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 ******************************************************************************
   6 *   file name:  nfrs.cpp
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 * Modification history
  12 * Date        Name      Comments
  13 * 10/11/2001  Doug      Ported from ICU4J
  14 */
  15
  16 #include "nfrs.h"
  17
  18 #if U_HAVE_RBNF
  19
  20 #include "unicode/uchar.h"
  21 #include "nfrule.h"
  22 #include "nfrlist.h"
  23 #include "patternprops.h"
  24
  25 #ifdef RBNF_DEBUG
  26 #include "cmemory.h"
  27 #endif
  28
  29 U_NAMESPACE_BEGIN
  30
  31 #if 0
  32 // euclid's algorithm works with doubles
  33 // note, doubles only get us up to one quadrillion or so, which
  34 // isn't as much range as we get with longs.  We probably still
  35 // want either 64-bit math, or BigInteger.
  36
  37 static int64_t
  38 util_lcm(int64_t x, int64_t y)
  39 {
  40     x.abs();
  41     y.abs();
  42
  43     if (x == 0 || y == 0) {
  44         return 0;
  45     } else {
  46         do {
  47             if (x < y) {
  48                 int64_t t = x; x = y; y = t;
  49             }
  50             x -= y * (x/y);
  51         } while (x != 0);
  52
  53         return y;
  54     }
  55 }
  56
  57 #else
  58 /**
  59  * Calculates the least common multiple of x and y.
  60  */
  61 static int64_t
  62 util_lcm(int64_t x, int64_t y)
  63 {
  64     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
  65     // vol. 2, 1st ed., pp. 298-299
  66     int64_t x1 = x;
  67     int64_t y1 = y;
  68
  69     int p2 = 0;
  70     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
  71         ++p2;
  72         x1 >>= 1;
  73         y1 >>= 1;
  74     }
  75
  76     int64_t t;
  77     if ((x1 & 1) == 1) {
  78         t = -y1;
  79     } else {
  80         t = x1;
  81     }
  82
  83     while (t != 0) {
  84         while ((t & 1) == 0) {
  85             t = t >> 1;
  86         }
  87         if (t > 0) {
  88             x1 = t;
  89         } else {
  90             y1 = -t;
  91         }
  92         t = x1 - y1;
  93     }
  94
  95     int64_t gcd = x1 << p2;
  96
  97     // x * y == gcd(x, y) * lcm(x, y)
  98     return x / gcd * y;
  99 }
 100 #endif
 101
 102 static const UChar gPercent = 0x0025;
 103 static const UChar gColon = 0x003a;
 104 static const UChar gSemicolon = 0x003b;
 105 static const UChar gLineFeed = 0x000a;
 106
 107 static const UChar gFourSpaces[] =
 108 {
 109     0x20, 0x20, 0x20, 0x20, 0
 110 }; /* "    " */
 111 static const UChar gPercentPercent[] =
 112 {
 113     0x25, 0x25, 0
 114 }; /* "%%" */
 115
 116 static const UChar gNoparse[] =
 117 {
 118     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
 119 }; /* "@noparse" */
 120
 121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
 122   : name()
 123   , rules(0)
 124   , negativeNumberRule(NULL)
 125   , fIsFractionRuleSet(FALSE)
 126   , fIsPublic(FALSE)
 127   , fIsParseable(TRUE)
 128   , fRecursionCount(0)
 129 {
 130     for (int i = 0; i < 3; ++i) {
 131         fractionRules[i] = NULL;
 132     }
 133
 134     if (U_FAILURE(status)) {
 135         return;
 136     }
 137
 138     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
 139
 140     if (description.length() == 0) {
 141         // throw new IllegalArgumentException("Empty rule set description");
 142         status = U_PARSE_ERROR;
 143         return;
 144     }
 145
 146     // if the description begins with a rule set name (the rule set
 147     // name can be omitted in formatter descriptions that consist
 148     // of only one rule set), copy it out into our "name" member
 149     // and delete it from the description
 150     if (description.charAt(0) == gPercent) {
 151         int32_t pos = description.indexOf(gColon);
 152         if (pos == -1) {
 153             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
 154             status = U_PARSE_ERROR;
 155         } else {
 156             name.setTo(description, 0, pos);
 157             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
 158             }
 159             description.remove(0, pos);
 160         }
 161     } else {
 162         name.setTo(UNICODE_STRING_SIMPLE("%default"));
 163     }
 164
 165     if (description.length() == 0) {
 166         // throw new IllegalArgumentException("Empty rule set description");
 167         status = U_PARSE_ERROR;
 168     }
 169
 170     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
 171
 172     if ( name.endsWith(gNoparse,8) ) {
 173         fIsParseable = FALSE;
 174         name.truncate(name.length()-8); // remove the @noparse from the name
 175     }
 176
 177     // all of the other members of NFRuleSet are initialized
 178     // by parseRules()
 179 }
 180
 181 void
 182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
 183 {
 184     // start by creating a Vector whose elements are Strings containing
 185     // the descriptions of the rules (one rule per element).  The rules
 186     // are separated by semicolons (there's no escape facility: ALL
 187     // semicolons are rule delimiters)
 188
 189     if (U_FAILURE(status)) {
 190         return;
 191     }
 192
 193     // ensure we are starting with an empty rule list
 194     rules.deleteAll();
 195
 196     // dlf - the original code kept a separate description array for no reason,
 197     // so I got rid of it.  The loop was too complex so I simplified it.
 198
 199     UnicodeString currentDescription;
 200     int32_t oldP = 0;
 201     while (oldP < description.length()) {
 202         int32_t p = description.indexOf(gSemicolon, oldP);
 203         if (p == -1) {
 204             p = description.length();
 205         }
 206         currentDescription.setTo(description, oldP, p - oldP);
 207         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
 208         oldP = p + 1;
 209     }
 210
 211     // for rules that didn't specify a base value, their base values
 212     // were initialized to 0.  Make another pass through the list and
 213     // set all those rules' base values.  We also remove any special
 214     // rules from the list and put them into their own member variables
 215     int64_t defaultBaseValue = 0;
 216
 217     // (this isn't a for loop because we might be deleting items from
 218     // the vector-- we want to make sure we only increment i when
 219     // we _didn't_ delete aything from the vector)
 220     uint32_t i = 0;
 221     while (i < rules.size()) {
 222         NFRule* rule = rules[i];
 223
 224         switch (rule->getType()) {
 225             // if the rule's base value is 0, fill in a default
 226             // base value (this will be 1 plus the preceding
 227             // rule's base value for regular rule sets, and the
 228             // same as the preceding rule's base value in fraction
 229             // rule sets)
 230         case NFRule::kNoBase:
 231             rule->setBaseValue(defaultBaseValue, status);
 232             if (!isFractionRuleSet()) {
 233                 ++defaultBaseValue;
 234             }
 235             ++i;
 236             break;
 237
 238             // if it's the negative-number rule, copy it into its own
 239             // data member and delete it from the list
 240         case NFRule::kNegativeNumberRule:
 241             if (negativeNumberRule) {
 242                 delete negativeNumberRule;
 243             }
 244             negativeNumberRule = rules.remove(i);
 245             break;
 246
 247             // if it's the improper fraction rule, copy it into the
 248             // correct element of fractionRules
 249         case NFRule::kImproperFractionRule:
 250             if (fractionRules[0]) {
 251                 delete fractionRules[0];
 252             }
 253             fractionRules[0] = rules.remove(i);
 254             break;
 255
 256             // if it's the proper fraction rule, copy it into the
 257             // correct element of fractionRules
 258         case NFRule::kProperFractionRule:
 259             if (fractionRules[1]) {
 260                 delete fractionRules[1];
 261             }
 262             fractionRules[1] = rules.remove(i);
 263             break;
 264
 265             // if it's the master rule, copy it into the
 266             // correct element of fractionRules
 267         case NFRule::kMasterRule:
 268             if (fractionRules[2]) {
 269                 delete fractionRules[2];
 270             }
 271             fractionRules[2] = rules.remove(i);
 272             break;
 273
 274             // if it's a regular rule that already knows its base value,
 275             // check to make sure the rules are in order, and update
 276             // the default base value for the next rule
 277         default:
 278             if (rule->getBaseValue() < defaultBaseValue) {
 279                 // throw new IllegalArgumentException("Rules are not in order");
 280                 status = U_PARSE_ERROR;
 281                 return;
 282             }
 283             defaultBaseValue = rule->getBaseValue();
 284             if (!isFractionRuleSet()) {
 285                 ++defaultBaseValue;
 286             }
 287             ++i;
 288             break;
 289         }
 290     }
 291 }
 292
 293 NFRuleSet::~NFRuleSet()
 294 {
 295     delete negativeNumberRule;
 296     delete fractionRules[0];
 297     delete fractionRules[1];
 298     delete fractionRules[2];
 299 }
 300
 301 static UBool
 302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
 303 {
 304     if (rule1) {
 305         if (rule2) {
 306             return *rule1 == *rule2;
 307         }
 308     } else if (!rule2) {
 309         return TRUE;
 310     }
 311     return FALSE;
 312 }
 313
 314 UBool
 315 NFRuleSet::operator==(const NFRuleSet& rhs) const
 316 {
 317     if (rules.size() == rhs.rules.size() &&
 318         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
 319         name == rhs.name &&
 320         util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
 321         util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
 322         util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
 323         util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
 324
 325         for (uint32_t i = 0; i < rules.size(); ++i) {
 326             if (*rules[i] != *rhs.rules[i]) {
 327                 return FALSE;
 328             }
 329         }
 330         return TRUE;
 331     }
 332     return FALSE;
 333 }
 334
 335 #define RECURSION_LIMIT 50
 336
 337 void
 338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
 339 {
 340     NFRule *rule = findNormalRule(number);
 341     if (rule) { // else error, but can't report it
 342         NFRuleSet* ncThis = (NFRuleSet*)this;
 343         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
 344             // stop recursion
 345             ncThis->fRecursionCount = 0;
 346         } else {
 347             rule->doFormat(number, toAppendTo, pos);
 348             ncThis->fRecursionCount--;
 349         }
 350     }
 351 }
 352
 353 void
 354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
 355 {
 356     NFRule *rule = findDoubleRule(number);
 357     if (rule) { // else error, but can't report it
 358         NFRuleSet* ncThis = (NFRuleSet*)this;
 359         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
 360             // stop recursion
 361             ncThis->fRecursionCount = 0;
 362         } else {
 363             rule->doFormat(number, toAppendTo, pos);
 364             ncThis->fRecursionCount--;
 365         }
 366     }
 367 }
 368
 369 NFRule*
 370 NFRuleSet::findDoubleRule(double number) const
 371 {
 372     // if this is a fraction rule set, use findFractionRuleSetRule()
 373     if (isFractionRuleSet()) {
 374         return findFractionRuleSetRule(number);
 375     }
 376
 377     // if the number is negative, return the negative number rule
 378     // (if there isn't a negative-number rule, we pretend it's a
 379     // positive number)
 380     if (number < 0) {
 381         if (negativeNumberRule) {
 382             return  negativeNumberRule;
 383         } else {
 384             number = -number;
 385         }
 386     }
 387
 388     // if the number isn't an integer, we use one of the fraction rules...
 389     if (number != uprv_floor(number)) {
 390         // if the number is between 0 and 1, return the proper
 391         // fraction rule
 392         if (number < 1 && fractionRules[1]) {
 393             return fractionRules[1];
 394         }
 395         // otherwise, return the improper fraction rule
 396         else if (fractionRules[0]) {
 397             return fractionRules[0];
 398         }
 399     }
 400
 401     // if there's a master rule, use it to format the number
 402     if (fractionRules[2]) {
 403         return fractionRules[2];
 404     }
 405
 406     // and if we haven't yet returned a rule, use findNormalRule()
 407     // to find the applicable rule
 408     int64_t r = util64_fromDouble(number + 0.5);
 409     return findNormalRule(r);
 410 }
 411
 412 NFRule *
 413 NFRuleSet::findNormalRule(int64_t number) const
 414 {
 415     // if this is a fraction rule set, use findFractionRuleSetRule()
 416     // to find the rule (we should only go into this clause if the
 417     // value is 0)
 418     if (fIsFractionRuleSet) {
 419         return findFractionRuleSetRule((double)number);
 420     }
 421
 422     // if the number is negative, return the negative-number rule
 423     // (if there isn't one, pretend the number is positive)
 424     if (number < 0) {
 425         if (negativeNumberRule) {
 426             return negativeNumberRule;
 427         } else {
 428             number = -number;
 429         }
 430     }
 431
 432     // we have to repeat the preceding two checks, even though we
 433     // do them in findRule(), because the version of format() that
 434     // takes a long bypasses findRule() and goes straight to this
 435     // function.  This function does skip the fraction rules since
 436     // we know the value is an integer (it also skips the master
 437     // rule, since it's considered a fraction rule.  Skipping the
 438     // master rule in this function is also how we avoid infinite
 439     // recursion)
 440
 441     // {dlf} unfortunately this fails if there are no rules except
 442     // special rules.  If there are no rules, use the master rule.
 443
 444     // binary-search the rule list for the applicable rule
 445     // (a rule is used for all values from its base value to
 446     // the next rule's base value)
 447     int32_t hi = rules.size();
 448     if (hi > 0) {
 449         int32_t lo = 0;
 450
 451         while (lo < hi) {
 452             int32_t mid = (lo + hi) / 2;
 453             if (rules[mid]->getBaseValue() == number) {
 454                 return rules[mid];
 455             }
 456             else if (rules[mid]->getBaseValue() > number) {
 457                 hi = mid;
 458             }
 459             else {
 460                 lo = mid + 1;
 461             }
 462         }
 463         if (hi == 0) { // bad rule set, minimum base > 0
 464             return NULL; // want to throw exception here
 465         }
 466
 467         NFRule *result = rules[hi - 1];
 468
 469         // use shouldRollBack() to see whether we need to invoke the
 470         // rollback rule (see shouldRollBack()'s documentation for
 471         // an explanation of the rollback rule).  If we do, roll back
 472         // one rule and return that one instead of the one we'd normally
 473         // return
 474         if (result->shouldRollBack((double)number)) {
 475             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
 476                 return NULL;
 477             }
 478             result = rules[hi - 2];
 479         }
 480         return result;
 481     }
 482     // else use the master rule
 483     return fractionRules[2];
 484 }
 485
 486 /**
 487  * If this rule is a fraction rule set, this function is used by
 488  * findRule() to select the most appropriate rule for formatting
 489  * the number.  Basically, the base value of each rule in the rule
 490  * set is treated as the denominator of a fraction.  Whichever
 491  * denominator can produce the fraction closest in value to the
 492  * number passed in is the result.  If there's a tie, the earlier
 493  * one in the list wins.  (If there are two rules in a row with the
 494  * same base value, the first one is used when the numerator of the
 495  * fraction would be 1, and the second rule is used the rest of the
 496  * time.
 497  * @param number The number being formatted (which will always be
 498  * a number between 0 and 1)
 499  * @return The rule to use to format this number
 500  */
 501 NFRule*
 502 NFRuleSet::findFractionRuleSetRule(double number) const
 503 {
 504     // the obvious way to do this (multiply the value being formatted
 505     // by each rule's base value until you get an integral result)
 506     // doesn't work because of rounding error.  This method is more
 507     // accurate
 508
 509     // find the least common multiple of the rules' base values
 510     // and multiply this by the number being formatted.  This is
 511     // all the precision we need, and we can do all of the rest
 512     // of the math using integer arithmetic
 513     int64_t leastCommonMultiple = rules[0]->getBaseValue();
 514     int64_t numerator;
 515     {
 516         for (uint32_t i = 1; i < rules.size(); ++i) {
 517             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
 518         }
 519         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
 520     }
 521     // for each rule, do the following...
 522     int64_t tempDifference;
 523     int64_t difference = util64_fromDouble(uprv_maxMantissa());
 524     int32_t winner = 0;
 525     for (uint32_t i = 0; i < rules.size(); ++i) {
 526         // "numerator" is the numerator of the fraction if the
 527         // denominator is the LCD.  The numerator if the rule's
 528         // base value is the denominator is "numerator" times the
 529         // base value divided bythe LCD.  Here we check to see if
 530         // that's an integer, and if not, how close it is to being
 531         // an integer.
 532         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
 533
 534
 535         // normalize the result of the above calculation: we want
 536         // the numerator's distance from the CLOSEST multiple
 537         // of the LCD
 538         if (leastCommonMultiple - tempDifference < tempDifference) {
 539             tempDifference = leastCommonMultiple - tempDifference;
 540         }
 541
 542         // if this is as close as we've come, keep track of how close
 543         // that is, and the line number of the rule that did it.  If
 544         // we've scored a direct hit, we don't have to look at any more
 545         // rules
 546         if (tempDifference < difference) {
 547             difference = tempDifference;
 548             winner = i;
 549             if (difference == 0) {
 550                 break;
 551             }
 552         }
 553     }
 554
 555     // if we have two successive rules that both have the winning base
 556     // value, then the first one (the one we found above) is used if
 557     // the numerator of the fraction is 1 and the second one is used if
 558     // the numerator of the fraction is anything else (this lets us
 559     // do things like "one third"/"two thirds" without haveing to define
 560     // a whole bunch of extra rule sets)
 561     if ((unsigned)(winner + 1) < rules.size() &&
 562         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
 563         double n = ((double)rules[winner]->getBaseValue()) * number;
 564         if (n < 0.5 || n >= 2) {
 565             ++winner;
 566         }
 567     }
 568
 569     // finally, return the winning rule
 570     return rules[winner];
 571 }
 572
 573 /**
 574  * Parses a string.  Matches the string to be parsed against each
 575  * of its rules (with a base value less than upperBound) and returns
 576  * the value produced by the rule that matched the most charcters
 577  * in the source string.
 578  * @param text The string to parse
 579  * @param parsePosition The initial position is ignored and assumed
 580  * to be 0.  On exit, this object has been updated to point to the
 581  * first character position this rule set didn't consume.
 582  * @param upperBound Limits the rules that can be allowed to match.
 583  * Only rules whose base values are strictly less than upperBound
 584  * are considered.
 585  * @return The numerical result of parsing this string.  This will
 586  * be the matching rule's base value, composed appropriately with
 587  * the results of matching any of its substitutions.  The object
 588  * will be an instance of Long if it's an integral value; otherwise,
 589  * it will be an instance of Double.  This function always returns
 590  * a valid object: If nothing matched the input string at all,
 591  * this function returns new Long(0), and the parse position is
 592  * left unchanged.
 593  */
 594 #ifdef RBNF_DEBUG
 595 #include <stdio.h>
 596
 597 static void dumpUS(FILE* f, const UnicodeString& us) {
 598   int len = us.length();
 599   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
 600   if (buf != NULL) {
 601           us.extract(0, len, buf);
 602           buf[len] = 0;
 603           fprintf(f, "%s", buf);
 604           uprv_free(buf); //delete[] buf;
 605   }
 606 }
 607 #endif
 608
 609 UBool
 610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
 611 {
 612     // try matching each rule in the rule set against the text being
 613     // parsed.  Whichever one matches the most characters is the one
 614     // that determines the value we return.
 615
 616     result.setLong(0);
 617
 618     // dump out if there's no text to parse
 619     if (text.length() == 0) {
 620         return 0;
 621     }
 622
 623     ParsePosition highWaterMark;
 624     ParsePosition workingPos = pos;
 625
 626 #ifdef RBNF_DEBUG
 627     fprintf(stderr, "<nfrs> %x '", this);
 628     dumpUS(stderr, name);
 629     fprintf(stderr, "' text '");
 630     dumpUS(stderr, text);
 631     fprintf(stderr, "'\n");
 632     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
 633 #endif
 634
 635     // start by trying the negative number rule (if there is one)
 636     if (negativeNumberRule) {
 637         Formattable tempResult;
 638 #ifdef RBNF_DEBUG
 639         fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
 640 #endif
 641         UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
 642 #ifdef RBNF_DEBUG
 643         fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
 644 #endif
 645         if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
 646             result = tempResult;
 647             highWaterMark = workingPos;
 648         }
 649         workingPos = pos;
 650     }
 651 #ifdef RBNF_DEBUG
 652     fprintf(stderr, "<nfrs> continue fractional with text '");
 653     dumpUS(stderr, text);
 654     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
 655 #endif
 656     // then try each of the fraction rules
 657     {
 658         for (int i = 0; i < 3; i++) {
 659             if (fractionRules[i]) {
 660                 Formattable tempResult;
 661                 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
 662                 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
 663                     result = tempResult;
 664                     highWaterMark = workingPos;
 665                 }
 666                 workingPos = pos;
 667             }
 668         }
 669     }
 670 #ifdef RBNF_DEBUG
 671     fprintf(stderr, "<nfrs> continue other with text '");
 672     dumpUS(stderr, text);
 673     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
 674 #endif
 675
 676     // finally, go through the regular rules one at a time.  We start
 677     // at the end of the list because we want to try matching the most
 678     // sigificant rule first (this helps ensure that we parse
 679     // "five thousand three hundred six" as
 680     // "(five thousand) (three hundred) (six)" rather than
 681     // "((five thousand three) hundred) (six)").  Skip rules whose
 682     // base values are higher than the upper bound (again, this helps
 683     // limit ambiguity by making sure the rules that match a rule's
 684     // are less significant than the rule containing the substitutions)/
 685     {
 686         int64_t ub = util64_fromDouble(upperBound);
 687 #ifdef RBNF_DEBUG
 688         {
 689             char ubstr[64];
 690             util64_toa(ub, ubstr, 64);
 691             char ubstrhex[64];
 692             util64_toa(ub, ubstrhex, 64, 16);
 693             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
 694         }
 695 #endif
 696         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
 697             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
 698                 continue;
 699             }
 700             Formattable tempResult;
 701             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
 702             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
 703                 result = tempResult;
 704                 highWaterMark = workingPos;
 705             }
 706             workingPos = pos;
 707         }
 708     }
 709 #ifdef RBNF_DEBUG
 710     fprintf(stderr, "<nfrs> exit\n");
 711 #endif
 712     // finally, update the parse postion we were passed to point to the
 713     // first character we didn't use, and return the result that
 714     // corresponds to that string of characters
 715     pos = highWaterMark;
 716
 717     return 1;
 718 }
 719
 720 void
 721 NFRuleSet::appendRules(UnicodeString& result) const
 722 {
 723     // the rule set name goes first...
 724     result.append(name);
 725     result.append(gColon);
 726     result.append(gLineFeed);
 727
 728     // followed by the regular rules...
 729     for (uint32_t i = 0; i < rules.size(); i++) {
 730         result.append(gFourSpaces, 4);
 731         rules[i]->_appendRuleText(result);
 732         result.append(gLineFeed);
 733     }
 734
 735     // followed by the special rules (if they exist)
 736     if (negativeNumberRule) {
 737         result.append(gFourSpaces, 4);
 738         negativeNumberRule->_appendRuleText(result);
 739         result.append(gLineFeed);
 740     }
 741
 742     {
 743         for (uint32_t i = 0; i < 3; ++i) {
 744             if (fractionRules[i]) {
 745                 result.append(gFourSpaces, 4);
 746                 fractionRules[i]->_appendRuleText(result);
 747                 result.append(gLineFeed);
 748             }
 749         }
 750     }
 751 }
 752
 753 // utility functions
 754
 755 int64_t util64_fromDouble(double d) {
 756     int64_t result = 0;
 757     if (!uprv_isNaN(d)) {
 758         double mant = uprv_maxMantissa();
 759         if (d < -mant) {
 760             d = -mant;
 761         } else if (d > mant) {
 762             d = mant;
 763         }
 764         UBool neg = d < 0;
 765         if (neg) {
 766             d = -d;
 767         }
 768         result = (int64_t)uprv_floor(d);
 769         if (neg) {
 770             result = -result;
 771         }
 772     }
 773     return result;
 774 }
 775
 776 int64_t util64_pow(int32_t r, uint32_t e)  {
 777     if (r == 0) {
 778         return 0;
 779     } else if (e == 0) {
 780         return 1;
 781     } else {
 782         int64_t n = r;
 783         while (--e > 0) {
 784             n *= r;
 785         }
 786         return n;
 787     }
 788 }
 789
 790 static const uint8_t asciiDigits[] = {
 791     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
 792     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
 793     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
 794     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
 795     0x77u, 0x78u, 0x79u, 0x7au,
 796 };
 797
 798 static const UChar kUMinus = (UChar)0x002d;
 799
 800 #ifdef RBNF_DEBUG
 801 static const char kMinus = '-';
 802
 803 static const uint8_t digitInfo[] = {
 804         0,     0,     0,     0,     0,     0,     0,     0,
 805         0,     0,     0,     0,     0,     0,     0,     0,
 806         0,     0,     0,     0,     0,     0,     0,     0,
 807         0,     0,     0,     0,     0,     0,     0,     0,
 808         0,     0,     0,     0,     0,     0,     0,     0,
 809         0,     0,     0,     0,     0,     0,     0,     0,
 810     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
 811     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
 812         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
 813     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
 814     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
 815     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
 816         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
 817     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
 818     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
 819     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
 820 };
 821
 822 int64_t util64_atoi(const char* str, uint32_t radix)
 823 {
 824     if (radix > 36) {
 825         radix = 36;
 826     } else if (radix < 2) {
 827         radix = 2;
 828     }
 829     int64_t lradix = radix;
 830
 831     int neg = 0;
 832     if (*str == kMinus) {
 833         ++str;
 834         neg = 1;
 835     }
 836     int64_t result = 0;
 837     uint8_t b;
 838     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
 839         result *= lradix;
 840         result += (int32_t)b;
 841     }
 842     if (neg) {
 843         result = -result;
 844     }
 845     return result;
 846 }
 847
 848 int64_t util64_utoi(const UChar* str, uint32_t radix)
 849 {
 850     if (radix > 36) {
 851         radix = 36;
 852     } else if (radix < 2) {
 853         radix = 2;
 854     }
 855     int64_t lradix = radix;
 856
 857     int neg = 0;
 858     if (*str == kUMinus) {
 859         ++str;
 860         neg = 1;
 861     }
 862     int64_t result = 0;
 863     UChar c;
 864     uint8_t b;
 865     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
 866         result *= lradix;
 867         result += (int32_t)b;
 868     }
 869     if (neg) {
 870         result = -result;
 871     }
 872     return result;
 873 }
 874
 875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
 876 {
 877     if (radix > 36) {
 878         radix = 36;
 879     } else if (radix < 2) {
 880         radix = 2;
 881     }
 882     int64_t base = radix;
 883
 884     char* p = buf;
 885     if (len && (w < 0) && (radix == 10) && !raw) {
 886         w = -w;
 887         *p++ = kMinus;
 888         --len;
 889     } else if (len && (w == 0)) {
 890         *p++ = (char)raw ? 0 : asciiDigits[0];
 891         --len;
 892     }
 893
 894     while (len && w != 0) {
 895         int64_t n = w / base;
 896         int64_t m = n * base;
 897         int32_t d = (int32_t)(w-m);
 898         *p++ = raw ? (char)d : asciiDigits[d];
 899         w = n;
 900         --len;
 901     }
 902     if (len) {
 903         *p = 0; // null terminate if room for caller convenience
 904     }
 905
 906     len = p - buf;
 907     if (*buf == kMinus) {
 908         ++buf;
 909     }
 910     while (--p > buf) {
 911         char c = *p;
 912         *p = *buf;
 913         *buf = c;
 914         ++buf;
 915     }
 916
 917     return len;
 918 }
 919 #endif
 920
 921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
 922 {
 923     if (radix > 36) {
 924         radix = 36;
 925     } else if (radix < 2) {
 926         radix = 2;
 927     }
 928     int64_t base = radix;
 929
 930     UChar* p = buf;
 931     if (len && (w < 0) && (radix == 10) && !raw) {
 932         w = -w;
 933         *p++ = kUMinus;
 934         --len;
 935     } else if (len && (w == 0)) {
 936         *p++ = (UChar)raw ? 0 : asciiDigits[0];
 937         --len;
 938     }
 939
 940     while (len && (w != 0)) {
 941         int64_t n = w / base;
 942         int64_t m = n * base;
 943         int32_t d = (int32_t)(w-m);
 944         *p++ = (UChar)(raw ? d : asciiDigits[d]);
 945         w = n;
 946         --len;
 947     }
 948     if (len) {
 949         *p = 0; // null terminate if room for caller convenience
 950     }
 951
 952     len = (uint32_t)(p - buf);
 953     if (*buf == kUMinus) {
 954         ++buf;
 955     }
 956     while (--p > buf) {
 957         UChar c = *p;
 958         *p = *buf;
 959         *buf = c;
 960         ++buf;
 961     }
 962
 963     return len;
 964 }
 965
 966
 967 U_NAMESPACE_END
 968
 969 /* U_HAVE_RBNF */
 970 #endif
 971