2 *******************************************************************************
3 * Copyright (C) 2009-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
8 *******************************************************************************
11 #include "unicode/decimfmt.h"
12 #include "unicode/messagepattern.h"
13 #include "unicode/plurfmt.h"
14 #include "unicode/plurrule.h"
15 #include "unicode/utypes.h"
17 #include "messageimpl.h"
19 #include "plurrule_impl.h"
22 #include "precision.h"
23 #include "visibledigits.h"
25 #if !UCONFIG_NO_FORMATTING
29 static const UChar OTHER_STRING[] = {
30 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
35 PluralFormat::PluralFormat(UErrorCode& status)
36 : locale(Locale::getDefault()),
40 init(NULL, UPLURAL_TYPE_CARDINAL, status);
43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
48 init(NULL, UPLURAL_TYPE_CARDINAL, status);
51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
52 : locale(Locale::getDefault()),
56 init(&rules, UPLURAL_TYPE_COUNT, status);
59 PluralFormat::PluralFormat(const Locale& loc,
60 const PluralRules& rules,
66 init(&rules, UPLURAL_TYPE_COUNT, status);
69 PluralFormat::PluralFormat(const Locale& loc,
76 init(NULL, type, status);
79 PluralFormat::PluralFormat(const UnicodeString& pat,
81 : locale(Locale::getDefault()),
85 init(NULL, UPLURAL_TYPE_CARDINAL, status);
86 applyPattern(pat, status);
89 PluralFormat::PluralFormat(const Locale& loc,
90 const UnicodeString& pat,
96 init(NULL, UPLURAL_TYPE_CARDINAL, status);
97 applyPattern(pat, status);
100 PluralFormat::PluralFormat(const PluralRules& rules,
101 const UnicodeString& pat,
103 : locale(Locale::getDefault()),
107 init(&rules, UPLURAL_TYPE_COUNT, status);
108 applyPattern(pat, status);
111 PluralFormat::PluralFormat(const Locale& loc,
112 const PluralRules& rules,
113 const UnicodeString& pat,
119 init(&rules, UPLURAL_TYPE_COUNT, status);
120 applyPattern(pat, status);
123 PluralFormat::PluralFormat(const Locale& loc,
125 const UnicodeString& pat,
131 init(NULL, type, status);
132 applyPattern(pat, status);
135 PluralFormat::PluralFormat(const PluralFormat& other)
137 locale(other.locale),
138 msgPattern(other.msgPattern),
140 offset(other.offset) {
145 PluralFormat::copyObjects(const PluralFormat& other) {
146 UErrorCode status = U_ZERO_ERROR;
147 if (numberFormat != NULL) {
150 if (pluralRulesWrapper.pluralRules != NULL) {
151 delete pluralRulesWrapper.pluralRules;
154 if (other.numberFormat == NULL) {
155 numberFormat = NumberFormat::createInstance(locale, status);
157 numberFormat = (NumberFormat*)other.numberFormat->clone();
159 if (other.pluralRulesWrapper.pluralRules == NULL) {
160 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
162 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
167 PluralFormat::~PluralFormat() {
172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173 if (U_FAILURE(status)) {
178 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
180 pluralRulesWrapper.pluralRules = rules->clone();
181 if (pluralRulesWrapper.pluralRules == NULL) {
182 status = U_MEMORY_ALLOCATION_ERROR;
187 numberFormat= NumberFormat::createInstance(locale, status);
191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192 msgPattern.parsePluralStyle(newPattern, NULL, status);
193 if (U_FAILURE(status)) {
198 offset = msgPattern.getPluralOffset(0);
202 PluralFormat::format(const Formattable& obj,
203 UnicodeString& appendTo,
205 UErrorCode& status) const
207 if (U_FAILURE(status)) return appendTo;
209 if (obj.isNumeric()) {
210 return format(obj, obj.getDouble(), appendTo, pos, status);
212 status = U_ILLEGAL_ARGUMENT_ERROR;
218 PluralFormat::format(int32_t number, UErrorCode& status) const {
219 FieldPosition fpos(0);
220 UnicodeString result;
221 return format(Formattable(number), number, result, fpos, status);
225 PluralFormat::format(double number, UErrorCode& status) const {
226 FieldPosition fpos(0);
227 UnicodeString result;
228 return format(Formattable(number), number, result, fpos, status);
233 PluralFormat::format(int32_t number,
234 UnicodeString& appendTo,
236 UErrorCode& status) const {
237 return format(Formattable(number), (double)number, appendTo, pos, status);
241 PluralFormat::format(double number,
242 UnicodeString& appendTo,
244 UErrorCode& status) const {
245 return format(Formattable(number), (double)number, appendTo, pos, status);
249 PluralFormat::format(const Formattable& numberObject, double number,
250 UnicodeString& appendTo,
252 UErrorCode& status) const {
253 if (U_FAILURE(status)) {
256 if (msgPattern.countParts() == 0) {
257 return numberFormat->format(numberObject, appendTo, pos, status);
259 // Get the appropriate sub-message.
260 // Select it based on the formatted number-offset.
261 double numberMinusOffset = number - offset;
262 UnicodeString numberString;
263 FieldPosition ignorePos;
265 VisibleDigitsWithExponent dec;
266 fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status);
267 if (U_FAILURE(status)) {
271 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
273 decFmt->initVisibleDigitsWithExponent(
274 numberObject, dec, status);
275 if (U_FAILURE(status)) {
278 decFmt->format(dec, numberString, ignorePos, status);
280 numberFormat->format(
281 numberObject, numberString, ignorePos, status); // could be BigDecimal etc.
284 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
286 decFmt->initVisibleDigitsWithExponent(
287 numberMinusOffset, dec, status);
288 if (U_FAILURE(status)) {
291 decFmt->format(dec, numberString, ignorePos, status);
293 numberFormat->format(
294 numberMinusOffset, numberString, ignorePos, status);
297 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
298 if (U_FAILURE(status)) { return appendTo; }
299 // Replace syntactic # signs in the top level of this sub-message
300 // (not in nested arguments) with the formatted number-offset.
301 const UnicodeString& pattern = msgPattern.getPatternString();
302 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
304 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
305 const UMessagePatternPartType type = part.getType();
306 int32_t index = part.getIndex();
307 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
308 return appendTo.append(pattern, prevIndex, index - prevIndex);
309 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
310 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
311 appendTo.append(pattern, prevIndex, index - prevIndex);
312 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
313 appendTo.append(numberString);
315 prevIndex = part.getLimit();
316 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
317 appendTo.append(pattern, prevIndex, index - prevIndex);
319 partIndex = msgPattern.getLimitPartIndex(partIndex);
320 index = msgPattern.getPart(partIndex).getLimit();
321 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
328 PluralFormat::toPattern(UnicodeString& appendTo) {
329 if (0 == msgPattern.countParts()) {
330 appendTo.setToBogus();
332 appendTo.append(msgPattern.getPatternString());
338 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
339 if (U_FAILURE(status)) {
347 pluralRulesWrapper.reset();
348 init(NULL, UPLURAL_TYPE_CARDINAL, status);
352 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
353 if (U_FAILURE(status)) {
356 NumberFormat* nf = (NumberFormat*)format->clone();
361 status = U_MEMORY_ALLOCATION_ERROR;
366 PluralFormat::clone() const
368 return new PluralFormat(*this);
373 PluralFormat::operator=(const PluralFormat& other) {
374 if (this != &other) {
375 locale = other.locale;
376 msgPattern = other.msgPattern;
377 offset = other.offset;
385 PluralFormat::operator==(const Format& other) const {
386 if (this == &other) {
389 if (!Format::operator==(other)) {
392 const PluralFormat& o = (const PluralFormat&)other;
394 locale == o.locale &&
395 msgPattern == o.msgPattern && // implies same offset
396 (numberFormat == NULL) == (o.numberFormat == NULL) &&
397 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
398 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
399 (pluralRulesWrapper.pluralRules == NULL ||
400 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
404 PluralFormat::operator!=(const Format& other) const {
405 return !operator==(other);
409 PluralFormat::parseObject(const UnicodeString& /*source*/,
410 Formattable& /*result*/,
411 ParsePosition& pos) const
413 // Parsing not supported.
414 pos.setErrorIndex(pos.getIndex());
417 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
418 const PluralSelector& selector, void *context,
419 double number, UErrorCode& ec) {
423 int32_t count=pattern.countParts();
425 const MessagePattern::Part* part=&pattern.getPart(partIndex);
426 if (MessagePattern::Part::hasNumericValue(part->getType())) {
427 offset=pattern.getNumericValue(*part);
432 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
433 // Then we get the keyword from the selector.
434 // (In other words, we never call the selector if we match against an explicit value,
435 // or if the only non-explicit keyword is "other".)
436 UnicodeString keyword;
437 UnicodeString other(FALSE, OTHER_STRING, 5);
438 // When we find a match, we set msgStart>0 and also set this boolean to true
439 // to avoid matching the keyword again (duplicates are allowed)
440 // while we continue to look for an explicit-value match.
441 UBool haveKeywordMatch=FALSE;
442 // msgStart is 0 until we find any appropriate sub-message.
443 // We remember the first "other" sub-message if we have not seen any
444 // appropriate sub-message before.
445 // We remember the first matching-keyword sub-message if we have not seen
446 // one of those before.
447 // (The parser allows [does not check for] duplicate keywords.
448 // We just have to make sure to take the first one.)
449 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
450 // at the first keyword match.
451 // We keep going until we find an explicit-value match or reach the end of the plural style.
453 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
454 // until ARG_LIMIT or end of plural-only pattern.
456 part=&pattern.getPart(partIndex++);
457 const UMessagePatternPartType type = part->getType();
458 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
461 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
462 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
463 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
464 // explicit value like "=2"
465 part=&pattern.getPart(partIndex++);
466 if(number==pattern.getNumericValue(*part)) {
467 // matches explicit value
470 } else if(!haveKeywordMatch) {
471 // plural keyword like "few" or "other"
472 // Compare "other" first and call the selector if this is not "other".
473 if(pattern.partSubstringMatches(*part, other)) {
476 if(0 == keyword.compare(other)) {
477 // This is the first "other" sub-message,
478 // and the selected keyword is also "other".
479 // Do not match "other" again.
480 haveKeywordMatch=TRUE;
484 if(keyword.isEmpty()) {
485 keyword=selector.select(context, number-offset, ec);
486 if(msgStart!=0 && (0 == keyword.compare(other))) {
487 // We have already seen an "other" sub-message.
488 // Do not match "other" again.
489 haveKeywordMatch=TRUE;
490 // Skip keyword matching but do getLimitPartIndex().
493 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
496 // Do not match this keyword again.
497 haveKeywordMatch=TRUE;
501 partIndex=pattern.getLimitPartIndex(partIndex);
502 } while(++partIndex<count);
506 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
507 // If no pattern was applied, return null.
508 if (msgPattern.countParts() == 0) {
509 pos.setBeginIndex(-1);
515 int count=msgPattern.countParts();
516 int startingAt = pos.getBeginIndex();
517 if (startingAt < 0) {
521 // The keyword is null until we need to match against a non-explicit, not-"other" value.
522 // Then we get the keyword from the selector.
523 // (In other words, we never call the selector if we match against an explicit value,
524 // or if the only non-explicit keyword is "other".)
525 UnicodeString keyword;
526 UnicodeString matchedWord;
527 const UnicodeString& pattern = msgPattern.getPatternString();
528 int matchedIndex = -1;
529 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
530 // until the end of the plural-only pattern.
531 while (partIndex < count) {
532 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
533 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
538 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
539 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
544 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
545 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
550 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
551 if (rbnfLenientScanner != NULL) {
552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
554 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
557 currMatchIndex = source.indexOf(currArg, startingAt);
559 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
560 matchedIndex = currMatchIndex;
561 matchedWord = currArg;
562 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
565 if (matchedIndex >= 0) {
566 pos.setBeginIndex(matchedIndex);
567 pos.setEndIndex(matchedIndex + matchedWord.length());
568 result.setString(keyword);
573 pos.setBeginIndex(-1);
577 PluralFormat::PluralSelector::~PluralSelector() {}
579 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
583 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
584 UErrorCode& /*ec*/) const {
585 (void)number; // unused except in the assertion
586 VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context);
587 return pluralRules->select(*dec);
590 void PluralFormat::PluralSelectorAdapter::reset() {
599 #endif /* #if !UCONFIG_NO_FORMATTING */