2 ******************************************************************************
3 * Copyright (C) 1997-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
8 * tab size: 8 (not used)
11 * Modification history
13 * 10/11/2001 Doug Ported from ICU4J
20 #include "unicode/uchar.h"
23 #include "patternprops.h"
32 // euclid's algorithm works with doubles
33 // note, doubles only get us up to one quadrillion or so, which
34 // isn't as much range as we get with longs. We probably still
35 // want either 64-bit math, or BigInteger.
38 util_lcm(int64_t x, int64_t y)
43 if (x == 0 || y == 0) {
48 int64_t t = x; x = y; y = t;
59 * Calculates the least common multiple of x and y.
62 util_lcm(int64_t x, int64_t y)
64 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
65 // vol. 2, 1st ed., pp. 298-299
70 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
84 while ((t & 1) == 0) {
95 int64_t gcd = x1 << p2;
97 // x * y == gcd(x, y) * lcm(x, y)
102 static const UChar gPercent = 0x0025;
103 static const UChar gColon = 0x003a;
104 static const UChar gSemicolon = 0x003b;
105 static const UChar gLineFeed = 0x000a;
107 static const UChar gFourSpaces[] =
109 0x20, 0x20, 0x20, 0x20, 0
111 static const UChar gPercentPercent[] =
116 static const UChar gNoparse[] =
118 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
124 , negativeNumberRule(NULL)
125 , fIsFractionRuleSet(FALSE)
130 for (int i = 0; i < 3; ++i) {
131 fractionRules[i] = NULL;
134 if (U_FAILURE(status)) {
138 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
140 if (description.length() == 0) {
141 // throw new IllegalArgumentException("Empty rule set description");
142 status = U_PARSE_ERROR;
146 // if the description begins with a rule set name (the rule set
147 // name can be omitted in formatter descriptions that consist
148 // of only one rule set), copy it out into our "name" member
149 // and delete it from the description
150 if (description.charAt(0) == gPercent) {
151 int32_t pos = description.indexOf(gColon);
153 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
154 status = U_PARSE_ERROR;
156 name.setTo(description, 0, pos);
157 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
159 description.remove(0, pos);
162 name.setTo(UNICODE_STRING_SIMPLE("%default"));
165 if (description.length() == 0) {
166 // throw new IllegalArgumentException("Empty rule set description");
167 status = U_PARSE_ERROR;
170 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
172 if ( name.endsWith(gNoparse,8) ) {
173 fIsParseable = FALSE;
174 name.truncate(name.length()-8); // remove the @noparse from the name
177 // all of the other members of NFRuleSet are initialized
182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
184 // start by creating a Vector whose elements are Strings containing
185 // the descriptions of the rules (one rule per element). The rules
186 // are separated by semicolons (there's no escape facility: ALL
187 // semicolons are rule delimiters)
189 if (U_FAILURE(status)) {
193 // ensure we are starting with an empty rule list
196 // dlf - the original code kept a separate description array for no reason,
197 // so I got rid of it. The loop was too complex so I simplified it.
199 UnicodeString currentDescription;
201 while (oldP < description.length()) {
202 int32_t p = description.indexOf(gSemicolon, oldP);
204 p = description.length();
206 currentDescription.setTo(description, oldP, p - oldP);
207 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
211 // for rules that didn't specify a base value, their base values
212 // were initialized to 0. Make another pass through the list and
213 // set all those rules' base values. We also remove any special
214 // rules from the list and put them into their own member variables
215 int64_t defaultBaseValue = 0;
217 // (this isn't a for loop because we might be deleting items from
218 // the vector-- we want to make sure we only increment i when
219 // we _didn't_ delete aything from the vector)
221 while (i < rules.size()) {
222 NFRule* rule = rules[i];
224 switch (rule->getType()) {
225 // if the rule's base value is 0, fill in a default
226 // base value (this will be 1 plus the preceding
227 // rule's base value for regular rule sets, and the
228 // same as the preceding rule's base value in fraction
230 case NFRule::kNoBase:
231 rule->setBaseValue(defaultBaseValue, status);
232 if (!isFractionRuleSet()) {
238 // if it's the negative-number rule, copy it into its own
239 // data member and delete it from the list
240 case NFRule::kNegativeNumberRule:
241 if (negativeNumberRule) {
242 delete negativeNumberRule;
244 negativeNumberRule = rules.remove(i);
247 // if it's the improper fraction rule, copy it into the
248 // correct element of fractionRules
249 case NFRule::kImproperFractionRule:
250 if (fractionRules[0]) {
251 delete fractionRules[0];
253 fractionRules[0] = rules.remove(i);
256 // if it's the proper fraction rule, copy it into the
257 // correct element of fractionRules
258 case NFRule::kProperFractionRule:
259 if (fractionRules[1]) {
260 delete fractionRules[1];
262 fractionRules[1] = rules.remove(i);
265 // if it's the master rule, copy it into the
266 // correct element of fractionRules
267 case NFRule::kMasterRule:
268 if (fractionRules[2]) {
269 delete fractionRules[2];
271 fractionRules[2] = rules.remove(i);
274 // if it's a regular rule that already knows its base value,
275 // check to make sure the rules are in order, and update
276 // the default base value for the next rule
278 if (rule->getBaseValue() < defaultBaseValue) {
279 // throw new IllegalArgumentException("Rules are not in order");
280 status = U_PARSE_ERROR;
283 defaultBaseValue = rule->getBaseValue();
284 if (!isFractionRuleSet()) {
293 NFRuleSet::~NFRuleSet()
295 delete negativeNumberRule;
296 delete fractionRules[0];
297 delete fractionRules[1];
298 delete fractionRules[2];
302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
306 return *rule1 == *rule2;
315 NFRuleSet::operator==(const NFRuleSet& rhs) const
317 if (rules.size() == rhs.rules.size() &&
318 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
320 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
321 util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
322 util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
323 util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
325 for (uint32_t i = 0; i < rules.size(); ++i) {
326 if (*rules[i] != *rhs.rules[i]) {
335 #define RECURSION_LIMIT 50
338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
340 NFRule *rule = findNormalRule(number);
341 if (rule) { // else error, but can't report it
342 NFRuleSet* ncThis = (NFRuleSet*)this;
343 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
345 ncThis->fRecursionCount = 0;
347 rule->doFormat(number, toAppendTo, pos);
348 ncThis->fRecursionCount--;
354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
356 NFRule *rule = findDoubleRule(number);
357 if (rule) { // else error, but can't report it
358 NFRuleSet* ncThis = (NFRuleSet*)this;
359 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
361 ncThis->fRecursionCount = 0;
363 rule->doFormat(number, toAppendTo, pos);
364 ncThis->fRecursionCount--;
370 NFRuleSet::findDoubleRule(double number) const
372 // if this is a fraction rule set, use findFractionRuleSetRule()
373 if (isFractionRuleSet()) {
374 return findFractionRuleSetRule(number);
377 // if the number is negative, return the negative number rule
378 // (if there isn't a negative-number rule, we pretend it's a
381 if (negativeNumberRule) {
382 return negativeNumberRule;
388 // if the number isn't an integer, we use one of the fraction rules...
389 if (number != uprv_floor(number)) {
390 // if the number is between 0 and 1, return the proper
392 if (number < 1 && fractionRules[1]) {
393 return fractionRules[1];
395 // otherwise, return the improper fraction rule
396 else if (fractionRules[0]) {
397 return fractionRules[0];
401 // if there's a master rule, use it to format the number
402 if (fractionRules[2]) {
403 return fractionRules[2];
406 // and if we haven't yet returned a rule, use findNormalRule()
407 // to find the applicable rule
408 int64_t r = util64_fromDouble(number + 0.5);
409 return findNormalRule(r);
413 NFRuleSet::findNormalRule(int64_t number) const
415 // if this is a fraction rule set, use findFractionRuleSetRule()
416 // to find the rule (we should only go into this clause if the
418 if (fIsFractionRuleSet) {
419 return findFractionRuleSetRule((double)number);
422 // if the number is negative, return the negative-number rule
423 // (if there isn't one, pretend the number is positive)
425 if (negativeNumberRule) {
426 return negativeNumberRule;
432 // we have to repeat the preceding two checks, even though we
433 // do them in findRule(), because the version of format() that
434 // takes a long bypasses findRule() and goes straight to this
435 // function. This function does skip the fraction rules since
436 // we know the value is an integer (it also skips the master
437 // rule, since it's considered a fraction rule. Skipping the
438 // master rule in this function is also how we avoid infinite
441 // {dlf} unfortunately this fails if there are no rules except
442 // special rules. If there are no rules, use the master rule.
444 // binary-search the rule list for the applicable rule
445 // (a rule is used for all values from its base value to
446 // the next rule's base value)
447 int32_t hi = rules.size();
452 int32_t mid = (lo + hi) / 2;
453 if (rules[mid]->getBaseValue() == number) {
456 else if (rules[mid]->getBaseValue() > number) {
463 if (hi == 0) { // bad rule set, minimum base > 0
464 return NULL; // want to throw exception here
467 NFRule *result = rules[hi - 1];
469 // use shouldRollBack() to see whether we need to invoke the
470 // rollback rule (see shouldRollBack()'s documentation for
471 // an explanation of the rollback rule). If we do, roll back
472 // one rule and return that one instead of the one we'd normally
474 if (result->shouldRollBack((double)number)) {
475 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
478 result = rules[hi - 2];
482 // else use the master rule
483 return fractionRules[2];
487 * If this rule is a fraction rule set, this function is used by
488 * findRule() to select the most appropriate rule for formatting
489 * the number. Basically, the base value of each rule in the rule
490 * set is treated as the denominator of a fraction. Whichever
491 * denominator can produce the fraction closest in value to the
492 * number passed in is the result. If there's a tie, the earlier
493 * one in the list wins. (If there are two rules in a row with the
494 * same base value, the first one is used when the numerator of the
495 * fraction would be 1, and the second rule is used the rest of the
497 * @param number The number being formatted (which will always be
498 * a number between 0 and 1)
499 * @return The rule to use to format this number
502 NFRuleSet::findFractionRuleSetRule(double number) const
504 // the obvious way to do this (multiply the value being formatted
505 // by each rule's base value until you get an integral result)
506 // doesn't work because of rounding error. This method is more
509 // find the least common multiple of the rules' base values
510 // and multiply this by the number being formatted. This is
511 // all the precision we need, and we can do all of the rest
512 // of the math using integer arithmetic
513 int64_t leastCommonMultiple = rules[0]->getBaseValue();
516 for (uint32_t i = 1; i < rules.size(); ++i) {
517 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
519 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
521 // for each rule, do the following...
522 int64_t tempDifference;
523 int64_t difference = util64_fromDouble(uprv_maxMantissa());
525 for (uint32_t i = 0; i < rules.size(); ++i) {
526 // "numerator" is the numerator of the fraction if the
527 // denominator is the LCD. The numerator if the rule's
528 // base value is the denominator is "numerator" times the
529 // base value divided bythe LCD. Here we check to see if
530 // that's an integer, and if not, how close it is to being
532 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
535 // normalize the result of the above calculation: we want
536 // the numerator's distance from the CLOSEST multiple
538 if (leastCommonMultiple - tempDifference < tempDifference) {
539 tempDifference = leastCommonMultiple - tempDifference;
542 // if this is as close as we've come, keep track of how close
543 // that is, and the line number of the rule that did it. If
544 // we've scored a direct hit, we don't have to look at any more
546 if (tempDifference < difference) {
547 difference = tempDifference;
549 if (difference == 0) {
555 // if we have two successive rules that both have the winning base
556 // value, then the first one (the one we found above) is used if
557 // the numerator of the fraction is 1 and the second one is used if
558 // the numerator of the fraction is anything else (this lets us
559 // do things like "one third"/"two thirds" without haveing to define
560 // a whole bunch of extra rule sets)
561 if ((unsigned)(winner + 1) < rules.size() &&
562 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
563 double n = ((double)rules[winner]->getBaseValue()) * number;
564 if (n < 0.5 || n >= 2) {
569 // finally, return the winning rule
570 return rules[winner];
574 * Parses a string. Matches the string to be parsed against each
575 * of its rules (with a base value less than upperBound) and returns
576 * the value produced by the rule that matched the most charcters
577 * in the source string.
578 * @param text The string to parse
579 * @param parsePosition The initial position is ignored and assumed
580 * to be 0. On exit, this object has been updated to point to the
581 * first character position this rule set didn't consume.
582 * @param upperBound Limits the rules that can be allowed to match.
583 * Only rules whose base values are strictly less than upperBound
585 * @return The numerical result of parsing this string. This will
586 * be the matching rule's base value, composed appropriately with
587 * the results of matching any of its substitutions. The object
588 * will be an instance of Long if it's an integral value; otherwise,
589 * it will be an instance of Double. This function always returns
590 * a valid object: If nothing matched the input string at all,
591 * this function returns new Long(0), and the parse position is
597 static void dumpUS(FILE* f, const UnicodeString& us) {
598 int len = us.length();
599 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
601 us.extract(0, len, buf);
603 fprintf(f, "%s", buf);
604 uprv_free(buf); //delete[] buf;
610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
612 // try matching each rule in the rule set against the text being
613 // parsed. Whichever one matches the most characters is the one
614 // that determines the value we return.
618 // dump out if there's no text to parse
619 if (text.length() == 0) {
623 ParsePosition highWaterMark;
624 ParsePosition workingPos = pos;
627 fprintf(stderr, "<nfrs> %x '", this);
628 dumpUS(stderr, name);
629 fprintf(stderr, "' text '");
630 dumpUS(stderr, text);
631 fprintf(stderr, "'\n");
632 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
635 // start by trying the negative number rule (if there is one)
636 if (negativeNumberRule) {
637 Formattable tempResult;
639 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
641 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
643 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
645 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
647 highWaterMark = workingPos;
652 fprintf(stderr, "<nfrs> continue fractional with text '");
653 dumpUS(stderr, text);
654 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
656 // then try each of the fraction rules
658 for (int i = 0; i < 3; i++) {
659 if (fractionRules[i]) {
660 Formattable tempResult;
661 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
662 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
664 highWaterMark = workingPos;
671 fprintf(stderr, "<nfrs> continue other with text '");
672 dumpUS(stderr, text);
673 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
676 // finally, go through the regular rules one at a time. We start
677 // at the end of the list because we want to try matching the most
678 // sigificant rule first (this helps ensure that we parse
679 // "five thousand three hundred six" as
680 // "(five thousand) (three hundred) (six)" rather than
681 // "((five thousand three) hundred) (six)"). Skip rules whose
682 // base values are higher than the upper bound (again, this helps
683 // limit ambiguity by making sure the rules that match a rule's
684 // are less significant than the rule containing the substitutions)/
686 int64_t ub = util64_fromDouble(upperBound);
690 util64_toa(ub, ubstr, 64);
692 util64_toa(ub, ubstrhex, 64, 16);
693 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
696 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
697 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
700 Formattable tempResult;
701 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
702 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
704 highWaterMark = workingPos;
710 fprintf(stderr, "<nfrs> exit\n");
712 // finally, update the parse postion we were passed to point to the
713 // first character we didn't use, and return the result that
714 // corresponds to that string of characters
721 NFRuleSet::appendRules(UnicodeString& result) const
723 // the rule set name goes first...
725 result.append(gColon);
726 result.append(gLineFeed);
728 // followed by the regular rules...
729 for (uint32_t i = 0; i < rules.size(); i++) {
730 result.append(gFourSpaces, 4);
731 rules[i]->_appendRuleText(result);
732 result.append(gLineFeed);
735 // followed by the special rules (if they exist)
736 if (negativeNumberRule) {
737 result.append(gFourSpaces, 4);
738 negativeNumberRule->_appendRuleText(result);
739 result.append(gLineFeed);
743 for (uint32_t i = 0; i < 3; ++i) {
744 if (fractionRules[i]) {
745 result.append(gFourSpaces, 4);
746 fractionRules[i]->_appendRuleText(result);
747 result.append(gLineFeed);
755 int64_t util64_fromDouble(double d) {
757 if (!uprv_isNaN(d)) {
758 double mant = uprv_maxMantissa();
761 } else if (d > mant) {
768 result = (int64_t)uprv_floor(d);
776 int64_t util64_pow(int32_t r, uint32_t e) {
790 static const uint8_t asciiDigits[] = {
791 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
792 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
793 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
794 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
795 0x77u, 0x78u, 0x79u, 0x7au,
798 static const UChar kUMinus = (UChar)0x002d;
801 static const char kMinus = '-';
803 static const uint8_t digitInfo[] = {
804 0, 0, 0, 0, 0, 0, 0, 0,
805 0, 0, 0, 0, 0, 0, 0, 0,
806 0, 0, 0, 0, 0, 0, 0, 0,
807 0, 0, 0, 0, 0, 0, 0, 0,
808 0, 0, 0, 0, 0, 0, 0, 0,
809 0, 0, 0, 0, 0, 0, 0, 0,
810 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
811 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
812 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
813 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
814 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
815 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
816 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
817 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
818 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
819 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
822 int64_t util64_atoi(const char* str, uint32_t radix)
826 } else if (radix < 2) {
829 int64_t lradix = radix;
832 if (*str == kMinus) {
838 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
840 result += (int32_t)b;
848 int64_t util64_utoi(const UChar* str, uint32_t radix)
852 } else if (radix < 2) {
855 int64_t lradix = radix;
858 if (*str == kUMinus) {
865 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
867 result += (int32_t)b;
875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
879 } else if (radix < 2) {
882 int64_t base = radix;
885 if (len && (w < 0) && (radix == 10) && !raw) {
889 } else if (len && (w == 0)) {
890 *p++ = (char)raw ? 0 : asciiDigits[0];
894 while (len && w != 0) {
895 int64_t n = w / base;
896 int64_t m = n * base;
897 int32_t d = (int32_t)(w-m);
898 *p++ = raw ? (char)d : asciiDigits[d];
903 *p = 0; // null terminate if room for caller convenience
907 if (*buf == kMinus) {
921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
925 } else if (radix < 2) {
928 int64_t base = radix;
931 if (len && (w < 0) && (radix == 10) && !raw) {
935 } else if (len && (w == 0)) {
936 *p++ = (UChar)raw ? 0 : asciiDigits[0];
940 while (len && (w != 0)) {
941 int64_t n = w / base;
942 int64_t m = n * base;
943 int32_t d = (int32_t)(w-m);
944 *p++ = (UChar)(raw ? d : asciiDigits[d]);
949 *p = 0; // null terminate if room for caller convenience
952 len = (uint32_t)(p - buf);
953 if (*buf == kUMinus) {