1 /********************************************************************
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 * 11/01/09 kirtig Added SelectFormat
20 ********************************************************************/
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_FORMATTING
26 #include "unicode/appendable.h"
27 #include "unicode/choicfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/decimfmt.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/msgfmt.h"
32 #include "unicode/plurfmt.h"
33 #include "unicode/rbnf.h"
34 #include "unicode/selfmt.h"
35 #include "unicode/smpdtfmt.h"
36 #include "unicode/umsg.h"
37 #include "unicode/ustring.h"
39 #include "patternprops.h"
40 #include "messageimpl.h"
41 #include "msgfmt_impl.h"
42 #include "plurrule_impl.h"
50 // *****************************************************************************
51 // class MessageFormat
52 // *****************************************************************************
54 #define SINGLE_QUOTE ((UChar)0x0027)
55 #define COMMA ((UChar)0x002C)
56 #define LEFT_CURLY_BRACE ((UChar)0x007B)
57 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
59 //---------------------------------------
62 static const UChar ID_NUMBER[] = {
63 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
65 static const UChar ID_DATE[] = {
66 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
68 static const UChar ID_TIME[] = {
69 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
71 static const UChar ID_SPELLOUT[] = {
72 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
74 static const UChar ID_ORDINAL[] = {
75 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
77 static const UChar ID_DURATION[] = {
78 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
81 // MessageFormat Type List Number, Date, Time or Choice
82 static const UChar * const TYPE_IDS[] = {
92 static const UChar ID_EMPTY[] = {
93 0 /* empty string, used for default so that null can mark end of list */
95 static const UChar ID_CURRENCY[] = {
96 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
98 static const UChar ID_PERCENT[] = {
99 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
101 static const UChar ID_INTEGER[] = {
102 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
105 // NumberFormat modifier list, default, currency, percent or integer
106 static const UChar * const NUMBER_STYLE_IDS[] = {
114 static const UChar ID_SHORT[] = {
115 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
117 static const UChar ID_MEDIUM[] = {
118 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
120 static const UChar ID_LONG[] = {
121 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
123 static const UChar ID_FULL[] = {
124 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
127 // DateFormat modifier list, default, short, medium, long or full
128 static const UChar * const DATE_STYLE_IDS[] = {
137 static const icu::DateFormat::EStyle DATE_STYLES[] = {
138 icu::DateFormat::kDefault,
139 icu::DateFormat::kShort,
140 icu::DateFormat::kMedium,
141 icu::DateFormat::kLong,
142 icu::DateFormat::kFull,
145 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
147 static const UChar NULL_STRING[] = {
148 0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
151 static const UChar OTHER_STRING[] = {
152 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
156 static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1,
157 const UHashTok key2) {
158 return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer);
165 // -------------------------------------
166 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
167 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
169 //--------------------------------------------------------------------
172 * Convert an integer value to a string and append the result to
173 * the given UnicodeString.
175 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
177 uprv_itou(temp,16,i,10,0); // 10 == radix
178 appendTo.append(temp, -1);
183 // AppendableWrapper: encapsulates the result of formatting, keeping track
184 // of the string and its length.
185 class AppendableWrapper : public UMemory {
187 AppendableWrapper(Appendable& appendable) : app(appendable), len(0) {
189 void append(const UnicodeString& s) {
190 app.appendString(s.getBuffer(), s.length());
193 void append(const UChar* s, const int32_t sLength) {
194 app.appendString(s, sLength);
197 void append(const UnicodeString& s, int32_t start, int32_t length) {
198 append(s.tempSubString(start, length));
200 void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) {
202 formatter->format(arg, s, ec);
207 void formatAndAppend(const Format* formatter, const Formattable& arg,
208 const UnicodeString &argString, UErrorCode& ec) {
209 if (!argString.isEmpty()) {
214 formatAndAppend(formatter, arg, ec);
226 // -------------------------------------
227 // Creates a MessageFormat instance based on the pattern.
229 MessageFormat::MessageFormat(const UnicodeString& pattern,
231 : fLocale(Locale::getDefault()), // Uses the default locale
234 formatAliasesCapacity(0),
238 hasArgTypeConflicts(FALSE),
239 defaultNumberFormat(NULL),
240 defaultDateFormat(NULL),
241 cachedFormatters(NULL),
242 customFormatArgStarts(NULL),
243 pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
244 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
246 setLocaleIDs(fLocale.getName(), fLocale.getName());
247 applyPattern(pattern, success);
250 MessageFormat::MessageFormat(const UnicodeString& pattern,
251 const Locale& newLocale,
253 : fLocale(newLocale),
256 formatAliasesCapacity(0),
260 hasArgTypeConflicts(FALSE),
261 defaultNumberFormat(NULL),
262 defaultDateFormat(NULL),
263 cachedFormatters(NULL),
264 customFormatArgStarts(NULL),
265 pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
266 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
268 setLocaleIDs(fLocale.getName(), fLocale.getName());
269 applyPattern(pattern, success);
272 MessageFormat::MessageFormat(const UnicodeString& pattern,
273 const Locale& newLocale,
274 UParseError& parseError,
276 : fLocale(newLocale),
279 formatAliasesCapacity(0),
283 hasArgTypeConflicts(FALSE),
284 defaultNumberFormat(NULL),
285 defaultDateFormat(NULL),
286 cachedFormatters(NULL),
287 customFormatArgStarts(NULL),
288 pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
289 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
291 setLocaleIDs(fLocale.getName(), fLocale.getName());
292 applyPattern(pattern, parseError, success);
295 MessageFormat::MessageFormat(const MessageFormat& that)
298 fLocale(that.fLocale),
299 msgPattern(that.msgPattern),
301 formatAliasesCapacity(0),
305 hasArgTypeConflicts(that.hasArgTypeConflicts),
306 defaultNumberFormat(NULL),
307 defaultDateFormat(NULL),
308 cachedFormatters(NULL),
309 customFormatArgStarts(NULL),
310 pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
311 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
313 // This will take care of creating the hash tables (since they are NULL).
314 UErrorCode ec = U_ZERO_ERROR;
315 copyObjects(that, ec);
321 MessageFormat::~MessageFormat()
323 uhash_close(cachedFormatters);
324 uhash_close(customFormatArgStarts);
327 uprv_free(formatAliases);
328 delete defaultNumberFormat;
329 delete defaultDateFormat;
332 //--------------------------------------------------------------------
333 // Variable-size array management
336 * Allocate argTypes[] to at least the given capacity and return
337 * TRUE if successful. If not, leave argTypes[] unchanged.
339 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
340 * if necessary to be at least as large as specified.
342 UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) {
343 if (U_FAILURE(status)) {
346 if (argTypeCapacity >= capacity) {
349 if (capacity < DEFAULT_INITIAL_CAPACITY) {
350 capacity = DEFAULT_INITIAL_CAPACITY;
351 } else if (capacity < 2*argTypeCapacity) {
352 capacity = 2*argTypeCapacity;
354 Formattable::Type* a = (Formattable::Type*)
355 uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
357 status = U_MEMORY_ALLOCATION_ERROR;
361 argTypeCapacity = capacity;
365 // -------------------------------------
366 // assignment operator
369 MessageFormat::operator=(const MessageFormat& that)
372 // Calls the super class for assignment first.
373 Format::operator=(that);
375 setLocale(that.fLocale);
376 msgPattern = that.msgPattern;
377 hasArgTypeConflicts = that.hasArgTypeConflicts;
379 UErrorCode ec = U_ZERO_ERROR;
380 copyObjects(that, ec);
389 MessageFormat::operator==(const Format& rhs) const
391 if (this == &rhs) return TRUE;
393 MessageFormat& that = (MessageFormat&)rhs;
395 // Check class ID before checking MessageFormat members
396 if (!Format::operator==(rhs) ||
397 msgPattern != that.msgPattern ||
398 fLocale != that.fLocale) {
402 // Compare hashtables.
403 if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) {
406 if (customFormatArgStarts == NULL) {
410 UErrorCode ec = U_ZERO_ERROR;
411 const int32_t count = uhash_count(customFormatArgStarts);
412 const int32_t rhs_count = uhash_count(that.customFormatArgStarts);
413 if (count != rhs_count) {
416 int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1;
417 for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) {
418 const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos);
419 const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos);
420 if (cur->key.integer != rhs_cur->key.integer) {
423 const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer);
424 const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer);
425 if (*format != *rhs_format) {
432 // -------------------------------------
433 // Creates a copy of this MessageFormat, the caller owns the copy.
436 MessageFormat::clone() const
438 return new MessageFormat(*this);
441 // -------------------------------------
442 // Sets the locale of this MessageFormat object to theLocale.
445 MessageFormat::setLocale(const Locale& theLocale)
447 if (fLocale != theLocale) {
448 delete defaultNumberFormat;
449 defaultNumberFormat = NULL;
450 delete defaultDateFormat;
451 defaultDateFormat = NULL;
453 setLocaleIDs(fLocale.getName(), fLocale.getName());
454 pluralProvider.reset();
455 ordinalProvider.reset();
459 // -------------------------------------
460 // Gets the locale of this MessageFormat object.
463 MessageFormat::getLocale() const
469 MessageFormat::applyPattern(const UnicodeString& newPattern,
472 UParseError parseError;
473 applyPattern(newPattern,parseError,status);
477 // -------------------------------------
478 // Applies the new pattern and returns an error if the pattern
481 MessageFormat::applyPattern(const UnicodeString& pattern,
482 UParseError& parseError,
488 msgPattern.parse(pattern, &parseError, ec);
489 cacheExplicitFormats(ec);
496 void MessageFormat::resetPattern() {
498 uhash_close(cachedFormatters);
499 cachedFormatters = NULL;
500 uhash_close(customFormatArgStarts);
501 customFormatArgStarts = NULL;
503 hasArgTypeConflicts = FALSE;
507 MessageFormat::applyPattern(const UnicodeString& pattern,
508 UMessagePatternApostropheMode aposMode,
509 UParseError* parseError,
510 UErrorCode& status) {
511 if (aposMode != msgPattern.getApostropheMode()) {
512 msgPattern.clearPatternAndSetApostropheMode(aposMode);
514 applyPattern(pattern, *parseError, status);
517 // -------------------------------------
518 // Converts this MessageFormat instance to a pattern.
521 MessageFormat::toPattern(UnicodeString& appendTo) const {
522 if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) ||
523 0 == msgPattern.countParts()
525 appendTo.setToBogus();
528 return appendTo.append(msgPattern.getPatternString());
531 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const {
532 if (partIndex != 0) {
533 partIndex = msgPattern.getLimitPartIndex(partIndex);
536 UMessagePatternPartType type = msgPattern.getPartType(++partIndex);
537 if (type == UMSGPAT_PART_TYPE_ARG_START) {
540 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
546 void MessageFormat::setArgStartFormat(int32_t argStart,
548 UErrorCode& status) {
549 if (U_FAILURE(status)) {
553 if (cachedFormatters == NULL) {
554 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
555 equalFormatsForHash, &status);
556 if (U_FAILURE(status)) {
560 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
562 if (formatter == NULL) {
563 formatter = new DummyFormat();
565 uhash_iput(cachedFormatters, argStart, formatter, &status);
569 UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) {
570 const MessagePattern::Part& part = msgPattern.getPart(partIndex);
571 return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ?
572 msgPattern.partSubstringMatches(part, argName) :
573 part.getValue() == argNumber; // ARG_NUMBER
576 // Sets a custom formatter for a MessagePattern ARG_START part index.
577 // "Custom" formatters are provided by the user via setFormat() or similar APIs.
578 void MessageFormat::setCustomArgStartFormat(int32_t argStart,
580 UErrorCode& status) {
581 setArgStartFormat(argStart, formatter, status);
582 if (customFormatArgStarts == NULL) {
583 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
586 uhash_iputi(customFormatArgStarts, argStart, 1, &status);
589 Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const {
590 if (cachedFormatters == NULL) {
593 void* ptr = uhash_iget(cachedFormatters, argumentNumber);
594 if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) {
595 return (Format*) ptr;
597 // Not cached, or a DummyFormat representing setFormat(NULL).
602 // -------------------------------------
603 // Adopts the new formats array and updates the array count.
604 // This MessageFormat instance owns the new formats.
606 MessageFormat::adoptFormats(Format** newFormats,
608 if (newFormats == NULL || count < 0) {
611 // Throw away any cached formatters.
612 if (cachedFormatters != NULL) {
613 uhash_removeAll(cachedFormatters);
615 if (customFormatArgStarts != NULL) {
616 uhash_removeAll(customFormatArgStarts);
619 int32_t formatNumber = 0;
620 UErrorCode status = U_ZERO_ERROR;
621 for (int32_t partIndex = 0;
622 formatNumber < count && U_SUCCESS(status) &&
623 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
624 setCustomArgStartFormat(partIndex, newFormats[formatNumber], status);
627 // Delete those that didn't get used (if any).
628 for (; formatNumber < count; ++formatNumber) {
629 delete newFormats[formatNumber];
634 // -------------------------------------
635 // Sets the new formats array and updates the array count.
636 // This MessageFormat instance maks a copy of the new formats.
639 MessageFormat::setFormats(const Format** newFormats,
641 if (newFormats == NULL || count < 0) {
644 // Throw away any cached formatters.
645 if (cachedFormatters != NULL) {
646 uhash_removeAll(cachedFormatters);
648 if (customFormatArgStarts != NULL) {
649 uhash_removeAll(customFormatArgStarts);
652 UErrorCode status = U_ZERO_ERROR;
653 int32_t formatNumber = 0;
654 for (int32_t partIndex = 0;
655 formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
656 Format* newFormat = NULL;
657 if (newFormats[formatNumber] != NULL) {
658 newFormat = newFormats[formatNumber]->clone();
659 if (newFormat == NULL) {
660 status = U_MEMORY_ALLOCATION_ERROR;
663 setCustomArgStartFormat(partIndex, newFormat, status);
666 if (U_FAILURE(status)) {
671 // -------------------------------------
672 // Adopt a single format by format number.
673 // Do nothing if the format number is not less than the array count.
676 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
677 LocalPointer<Format> p(newFormat);
679 int32_t formatNumber = 0;
680 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
681 if (n == formatNumber) {
682 UErrorCode status = U_ZERO_ERROR;
683 setCustomArgStartFormat(partIndex, p.orphan(), status);
691 // -------------------------------------
692 // Adopt a single format by format name.
693 // Do nothing if there is no match of formatName.
695 MessageFormat::adoptFormat(const UnicodeString& formatName,
696 Format* formatToAdopt,
697 UErrorCode& status) {
698 LocalPointer<Format> p(formatToAdopt);
699 if (U_FAILURE(status)) {
702 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
703 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
704 status = U_ILLEGAL_ARGUMENT_ERROR;
707 for (int32_t partIndex = 0;
708 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
710 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
714 } else if (formatToAdopt == NULL) {
717 f = formatToAdopt->clone();
719 status = U_MEMORY_ALLOCATION_ERROR;
723 setCustomArgStartFormat(partIndex, f, status);
728 // -------------------------------------
729 // Set a single format.
730 // Do nothing if the variable is not less than the array count.
732 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
735 int32_t formatNumber = 0;
736 for (int32_t partIndex = 0;
737 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
738 if (n == formatNumber) {
739 Format* new_format = newFormat.clone();
741 UErrorCode status = U_ZERO_ERROR;
742 setCustomArgStartFormat(partIndex, new_format, status);
751 // -------------------------------------
752 // Get a single format by format name.
753 // Do nothing if the variable is not less than the array count.
755 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
756 if (U_FAILURE(status) || cachedFormatters == NULL) return NULL;
758 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
759 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
760 status = U_ILLEGAL_ARGUMENT_ERROR;
763 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
764 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
765 return getCachedFormatter(partIndex);
771 // -------------------------------------
772 // Set a single format by format name
773 // Do nothing if the variable is not less than the array count.
775 MessageFormat::setFormat(const UnicodeString& formatName,
776 const Format& newFormat,
777 UErrorCode& status) {
778 if (U_FAILURE(status)) return;
780 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
781 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
782 status = U_ILLEGAL_ARGUMENT_ERROR;
785 for (int32_t partIndex = 0;
786 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
788 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
789 if (&newFormat == NULL) {
790 setCustomArgStartFormat(partIndex, NULL, status);
792 Format* new_format = newFormat.clone();
793 if (new_format == NULL) {
794 status = U_MEMORY_ALLOCATION_ERROR;
797 setCustomArgStartFormat(partIndex, new_format, status);
803 // -------------------------------------
804 // Gets the format array.
806 MessageFormat::getFormats(int32_t& cnt) const
808 // This old API returns an array (which we hold) of Format*
809 // pointers. The array is valid up to the next call to any
810 // method on this object. We construct and resize an array
811 // on demand that contains aliases to the subformats[i].format
813 MessageFormat* t = const_cast<MessageFormat*> (this);
815 if (formatAliases == NULL) {
816 t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount;
817 Format** a = (Format**)
818 uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
820 t->formatAliasesCapacity = 0;
823 t->formatAliases = a;
824 } else if (argTypeCount > formatAliasesCapacity) {
825 Format** a = (Format**)
826 uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount);
828 t->formatAliasesCapacity = 0;
831 t->formatAliases = a;
832 t->formatAliasesCapacity = argTypeCount;
835 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
836 t->formatAliases[cnt++] = getCachedFormatter(partIndex);
839 return (const Format**)formatAliases;
843 UnicodeString MessageFormat::getArgName(int32_t partIndex) {
844 const MessagePattern::Part& part = msgPattern.getPart(partIndex);
845 return msgPattern.getSubstring(part);
849 MessageFormat::getFormatNames(UErrorCode& status) {
850 if (U_FAILURE(status)) return NULL;
852 UVector *fFormatNames = new UVector(status);
853 if (U_FAILURE(status)) {
854 status = U_MEMORY_ALLOCATION_ERROR;
857 fFormatNames->setDeleter(uprv_deleteUObject);
859 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
860 fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status);
863 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
864 return nameEnumerator;
867 // -------------------------------------
868 // Formats the source Formattable array and copy into the result buffer.
869 // Ignore the FieldPosition result for error checking.
872 MessageFormat::format(const Formattable* source,
874 UnicodeString& appendTo,
875 FieldPosition& ignore,
876 UErrorCode& success) const
878 return format(source, NULL, cnt, appendTo, &ignore, success);
881 // -------------------------------------
882 // Internally creates a MessageFormat instance based on the
883 // pattern and formats the arguments Formattable array and
884 // copy into the appendTo buffer.
887 MessageFormat::format( const UnicodeString& pattern,
888 const Formattable* arguments,
890 UnicodeString& appendTo,
893 MessageFormat temp(pattern, success);
894 return temp.format(arguments, NULL, cnt, appendTo, NULL, success);
897 // -------------------------------------
898 // Formats the source Formattable object and copy into the
899 // appendTo buffer. The Formattable object must be an array
900 // of Formattable instances, returns error otherwise.
903 MessageFormat::format(const Formattable& source,
904 UnicodeString& appendTo,
905 FieldPosition& ignore,
906 UErrorCode& success) const
908 if (U_FAILURE(success))
910 if (source.getType() != Formattable::kArray) {
911 success = U_ILLEGAL_ARGUMENT_ERROR;
915 const Formattable* tmpPtr = source.getArray(cnt);
916 return format(tmpPtr, NULL, cnt, appendTo, &ignore, success);
920 MessageFormat::format(const UnicodeString* argumentNames,
921 const Formattable* arguments,
923 UnicodeString& appendTo,
924 UErrorCode& success) const {
925 return format(arguments, argumentNames, count, appendTo, NULL, success);
928 // Does linear search to find the match for an ArgName.
929 const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments,
930 const UnicodeString *argumentNames,
931 int32_t cnt, UnicodeString& name) const {
932 for (int32_t i = 0; i < cnt; ++i) {
933 if (0 == argumentNames[i].compare(name)) {
934 return arguments + i;
942 MessageFormat::format(const Formattable* arguments,
943 const UnicodeString *argumentNames,
945 UnicodeString& appendTo,
947 UErrorCode& status) const {
948 if (U_FAILURE(status)) {
952 UnicodeStringAppendable usapp(appendTo);
953 AppendableWrapper app(usapp);
954 format(0, NULL, arguments, argumentNames, cnt, app, pos, status);
961 * Mutable input/output values for the PluralSelectorProvider.
962 * Separate so that it is possible to make MessageFormat Freezable.
964 class PluralSelectorContext {
966 PluralSelectorContext(int32_t start, const UnicodeString &name,
967 const Formattable &num, double off, UErrorCode &errorCode)
968 : startIndex(start), argName(name), offset(off),
969 numberArgIndex(-1), formatter(NULL), forReplaceNumber(FALSE) {
970 // number needs to be set even when select() is not called.
971 // Keep it as a Number/Formattable:
972 // For format() methods, and to preserve information (e.g., BigDecimal).
976 number = num.getDouble(errorCode) - off;
980 // Input values for plural selection with decimals.
982 const UnicodeString &argName;
983 /** argument number - plural offset */
986 // Output values for plural selection with decimals.
987 /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
988 int32_t numberArgIndex;
989 const Format *formatter;
990 /** formatted argument number - plural offset */
991 UnicodeString numberString;
992 /** TRUE if number-offset was formatted with the stock number formatter */
993 UBool forReplaceNumber;
998 // if argumentNames is NULL, this means arguments is a numeric array.
999 // arguments can not be NULL.
1000 // We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
1001 // so that we need not declare the PluralSelectorContext in the public header file.
1002 void MessageFormat::format(int32_t msgStart, const void *plNumber,
1003 const Formattable* arguments,
1004 const UnicodeString *argumentNames,
1006 AppendableWrapper& appendTo,
1007 FieldPosition* ignore,
1008 UErrorCode& success) const {
1009 if (U_FAILURE(success)) {
1013 const UnicodeString& msgString = msgPattern.getPatternString();
1014 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
1015 for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) {
1016 const MessagePattern::Part* part = &msgPattern.getPart(i);
1017 const UMessagePatternPartType type = part->getType();
1018 int32_t index = part->getIndex();
1019 appendTo.append(msgString, prevIndex, index - prevIndex);
1020 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
1023 prevIndex = part->getLimit();
1024 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
1025 const PluralSelectorContext &pluralNumber =
1026 *static_cast<const PluralSelectorContext *>(plNumber);
1027 if(pluralNumber.forReplaceNumber) {
1028 // number-offset was already formatted.
1029 appendTo.formatAndAppend(pluralNumber.formatter,
1030 pluralNumber.number, pluralNumber.numberString, success);
1032 const NumberFormat* nf = getDefaultNumberFormat(success);
1033 appendTo.formatAndAppend(nf, pluralNumber.number, success);
1037 if (type != UMSGPAT_PART_TYPE_ARG_START) {
1040 int32_t argLimit = msgPattern.getLimitPartIndex(i);
1041 UMessagePatternArgType argType = part->getArgType();
1042 part = &msgPattern.getPart(++i);
1043 const Formattable* arg;
1044 UBool noArg = FALSE;
1045 UnicodeString argName = msgPattern.getSubstring(*part);
1046 if (argumentNames == NULL) {
1047 int32_t argNumber = part->getValue(); // ARG_NUMBER
1048 if (0 <= argNumber && argNumber < cnt) {
1049 arg = arguments + argNumber;
1055 arg = getArgFromListByName(arguments, argumentNames, cnt, argName);
1061 int32_t prevDestLength = appendTo.length();
1062 const Format* formatter = NULL;
1065 UnicodeString(LEFT_CURLY_BRACE).append(argName).append(RIGHT_CURLY_BRACE));
1066 } else if (arg == NULL) {
1067 appendTo.append(NULL_STRING, 4);
1068 } else if(plNumber!=NULL &&
1069 static_cast<const PluralSelectorContext *>(plNumber)->numberArgIndex==(i-2)) {
1070 const PluralSelectorContext &pluralNumber =
1071 *static_cast<const PluralSelectorContext *>(plNumber);
1072 if(pluralNumber.offset == 0) {
1073 // The number was already formatted with this formatter.
1074 appendTo.formatAndAppend(pluralNumber.formatter, pluralNumber.number,
1075 pluralNumber.numberString, success);
1077 // Do not use the formatted (number-offset) string for a named argument
1078 // that formats the number without subtracting the offset.
1079 appendTo.formatAndAppend(pluralNumber.formatter, *arg, success);
1081 } else if ((formatter = getCachedFormatter(i -2))) {
1082 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
1083 if (dynamic_cast<const ChoiceFormat*>(formatter) ||
1084 dynamic_cast<const PluralFormat*>(formatter) ||
1085 dynamic_cast<const SelectFormat*>(formatter)) {
1086 // We only handle nested formats here if they were provided via
1087 // setFormat() or its siblings. Otherwise they are not cached and instead
1088 // handled below according to argType.
1089 UnicodeString subMsgString;
1090 formatter->format(*arg, subMsgString, success);
1091 if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 ||
1092 (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern))
1094 MessageFormat subMsgFormat(subMsgString, fLocale, success);
1095 subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, ignore, success);
1097 appendTo.append(subMsgString);
1100 appendTo.formatAndAppend(formatter, *arg, success);
1102 } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) {
1103 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1104 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1105 // for the hash table containind DummyFormat.
1106 if (arg->isNumeric()) {
1107 const NumberFormat* nf = getDefaultNumberFormat(success);
1108 appendTo.formatAndAppend(nf, *arg, success);
1109 } else if (arg->getType() == Formattable::kDate) {
1110 const DateFormat* df = getDefaultDateFormat(success);
1111 appendTo.formatAndAppend(df, *arg, success);
1113 appendTo.append(arg->getString(success));
1115 } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
1116 if (!arg->isNumeric()) {
1117 success = U_ILLEGAL_ARGUMENT_ERROR;
1120 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1121 // because only this one converts non-double numeric types to double.
1122 const double number = arg->getDouble(success);
1123 int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number);
1124 formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
1125 cnt, appendTo, success);
1126 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) {
1127 if (!arg->isNumeric()) {
1128 success = U_ILLEGAL_ARGUMENT_ERROR;
1131 const PluralSelectorProvider &selector =
1132 argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider;
1133 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1134 // because only this one converts non-double numeric types to double.
1135 double offset = msgPattern.getPluralOffset(i);
1136 PluralSelectorContext context(i, argName, *arg, offset, success);
1137 int32_t subMsgStart = PluralFormat::findSubMessage(
1138 msgPattern, i, selector, &context, arg->getDouble(success), success);
1139 formatComplexSubMessage(subMsgStart, &context, arguments, argumentNames,
1140 cnt, appendTo, success);
1141 } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
1142 int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success);
1143 formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
1144 cnt, appendTo, success);
1146 // This should never happen.
1147 success = U_INTERNAL_PROGRAM_ERROR;
1150 ignore = updateMetaData(appendTo, prevDestLength, ignore, arg);
1151 prevIndex = msgPattern.getPart(argLimit).getLimit();
1157 void MessageFormat::formatComplexSubMessage(int32_t msgStart,
1158 const void *plNumber,
1159 const Formattable* arguments,
1160 const UnicodeString *argumentNames,
1162 AppendableWrapper& appendTo,
1163 UErrorCode& success) const {
1164 if (U_FAILURE(success)) {
1168 if (!MessageImpl::jdkAposMode(msgPattern)) {
1169 format(msgStart, plNumber, arguments, argumentNames, cnt, appendTo, NULL, success);
1173 // JDK compatibility mode: (see JDK MessageFormat.format() API docs)
1174 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes
1175 // - if the result string contains an open curly brace '{' then
1176 // instantiate a temporary MessageFormat object and format again;
1177 // otherwise just append the result string
1178 const UnicodeString& msgString = msgPattern.getPatternString();
1180 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
1181 for (int32_t i = msgStart;;) {
1182 const MessagePattern::Part& part = msgPattern.getPart(++i);
1183 const UMessagePatternPartType type = part.getType();
1184 int32_t index = part.getIndex();
1185 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
1186 sb.append(msgString, prevIndex, index - prevIndex);
1188 } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
1189 sb.append(msgString, prevIndex, index - prevIndex);
1190 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
1191 const PluralSelectorContext &pluralNumber =
1192 *static_cast<const PluralSelectorContext *>(plNumber);
1193 if(pluralNumber.forReplaceNumber) {
1194 // number-offset was already formatted.
1195 sb.append(pluralNumber.numberString);
1197 const NumberFormat* nf = getDefaultNumberFormat(success);
1198 sb.append(nf->format(pluralNumber.number, sb, success));
1201 prevIndex = part.getLimit();
1202 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
1203 sb.append(msgString, prevIndex, index - prevIndex);
1205 i = msgPattern.getLimitPartIndex(i);
1206 index = msgPattern.getPart(i).getLimit();
1207 MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb);
1211 if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) {
1212 UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
1213 MessageFormat subMsgFormat(emptyPattern, fLocale, success);
1214 subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success);
1215 subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, NULL, success);
1217 appendTo.append(sb);
1222 UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const {
1223 const UnicodeString& msgString=msgPattern.getPatternString();
1224 int32_t prevIndex=msgPattern.getPart(from).getLimit();
1226 for (int32_t i = from + 1; ; ++i) {
1227 const MessagePattern::Part& part = msgPattern.getPart(i);
1228 const UMessagePatternPartType type=part.getType();
1229 int32_t index=part.getIndex();
1230 b.append(msgString, prevIndex, index - prevIndex);
1231 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1234 // Unexpected Part "part" in parsed message.
1235 U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR);
1236 prevIndex=part.getLimit();
1241 FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/,
1242 FieldPosition* /*fp*/, const Formattable* /*argId*/) const {
1243 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
1246 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
1247 fp->setBeginIndex(prevLength);
1248 fp->setEndIndex(dest.get_length());
1256 MessageFormat::findOtherSubMessage(int32_t partIndex) const {
1257 int32_t count=msgPattern.countParts();
1258 const MessagePattern::Part *part = &msgPattern.getPart(partIndex);
1259 if(MessagePattern::Part::hasNumericValue(part->getType())) {
1262 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
1263 // until ARG_LIMIT or end of plural-only pattern.
1264 UnicodeString other(FALSE, OTHER_STRING, 5);
1266 part=&msgPattern.getPart(partIndex++);
1267 UMessagePatternPartType type=part->getType();
1268 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
1271 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
1272 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
1273 if(msgPattern.partSubstringMatches(*part, other)) {
1276 if(MessagePattern::Part::hasNumericValue(msgPattern.getPartType(partIndex))) {
1277 ++partIndex; // skip the numeric-value part of "=1" etc.
1279 partIndex=msgPattern.getLimitPartIndex(partIndex);
1280 } while(++partIndex<count);
1285 MessageFormat::findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const {
1286 for(int32_t i=msgStart+1;; ++i) {
1287 const MessagePattern::Part &part=msgPattern.getPart(i);
1288 UMessagePatternPartType type=part.getType();
1289 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1292 if(type==UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
1295 if(type==UMSGPAT_PART_TYPE_ARG_START) {
1296 UMessagePatternArgType argType=part.getArgType();
1297 if(!argName.isEmpty() && (argType==UMSGPAT_ARG_TYPE_NONE || argType==UMSGPAT_ARG_TYPE_SIMPLE)) {
1298 // ARG_NUMBER or ARG_NAME
1299 if(msgPattern.partSubstringMatches(msgPattern.getPart(i+1), argName)) {
1303 i=msgPattern.getLimitPartIndex(i);
1308 void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
1309 // Deep copy pointer fields.
1310 // We need not copy the formatAliases because they are re-filled
1311 // in each getFormats() call.
1312 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
1313 // also get created on demand.
1314 argTypeCount = that.argTypeCount;
1315 if (argTypeCount > 0) {
1316 if (!allocateArgTypes(argTypeCount, ec)) {
1319 uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0]));
1321 if (cachedFormatters != NULL) {
1322 uhash_removeAll(cachedFormatters);
1324 if (customFormatArgStarts != NULL) {
1325 uhash_removeAll(customFormatArgStarts);
1327 if (that.cachedFormatters) {
1328 if (cachedFormatters == NULL) {
1329 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
1330 equalFormatsForHash, &ec);
1331 if (U_FAILURE(ec)) {
1334 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
1337 const int32_t count = uhash_count(that.cachedFormatters);
1339 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
1340 const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos);
1341 Format* newFormat = ((Format*)(cur->value.pointer))->clone();
1343 uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec);
1345 ec = U_MEMORY_ALLOCATION_ERROR;
1350 if (that.customFormatArgStarts) {
1351 if (customFormatArgStarts == NULL) {
1352 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
1355 const int32_t count = uhash_count(that.customFormatArgStarts);
1357 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
1358 const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos);
1359 uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec);
1366 MessageFormat::parse(int32_t msgStart,
1367 const UnicodeString& source,
1370 UErrorCode& ec) const {
1372 if (U_FAILURE(ec)) {
1373 pos.setErrorIndex(pos.getIndex());
1376 // parse() does not work with named arguments.
1377 if (msgPattern.hasNamedArguments()) {
1378 ec = U_ARGUMENT_TYPE_MISMATCH;
1379 pos.setErrorIndex(pos.getIndex());
1382 LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]);
1383 const UnicodeString& msgString=msgPattern.getPatternString();
1384 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
1385 int32_t sourceOffset = pos.getIndex();
1386 ParsePosition tempStatus(0);
1388 for(int32_t i=msgStart+1; ; ++i) {
1389 UBool haveArgResult = FALSE;
1390 const MessagePattern::Part* part=&msgPattern.getPart(i);
1391 const UMessagePatternPartType type=part->getType();
1392 int32_t index=part->getIndex();
1393 // Make sure the literal string matches.
1394 int32_t len = index - prevIndex;
1395 if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) {
1396 sourceOffset += len;
1399 pos.setErrorIndex(sourceOffset);
1400 return NULL; // leave index as is to signal error
1402 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1403 // Things went well! Done.
1404 pos.setIndex(sourceOffset);
1405 return resultArray.orphan();
1407 if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) {
1408 prevIndex=part->getLimit();
1411 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
1412 // Unexpected Part "part" in parsed message.
1413 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START);
1414 int32_t argLimit=msgPattern.getLimitPartIndex(i);
1416 UMessagePatternArgType argType=part->getArgType();
1417 part=&msgPattern.getPart(++i);
1418 int32_t argNumber = part->getValue(); // ARG_NUMBER
1421 const Format* formatter = NULL;
1422 Formattable& argResult = resultArray[argNumber];
1424 if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) {
1425 // Just parse using the formatter.
1426 tempStatus.setIndex(sourceOffset);
1427 formatter->parseObject(source, argResult, tempStatus);
1428 if (tempStatus.getIndex() == sourceOffset) {
1429 pos.setErrorIndex(sourceOffset);
1430 return NULL; // leave index as is to signal error
1432 sourceOffset = tempStatus.getIndex();
1433 haveArgResult = TRUE;
1435 argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) {
1436 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1437 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1438 // for the hash table containind DummyFormat.
1440 // Match as a string.
1441 // if at end, use longest possible match
1442 // otherwise uses first match to intervening string
1443 // does NOT recursively try all possibilities
1444 UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit);
1446 if (!stringAfterArgument.isEmpty()) {
1447 next = source.indexOf(stringAfterArgument, sourceOffset);
1449 next = source.length();
1452 pos.setErrorIndex(sourceOffset);
1453 return NULL; // leave index as is to signal error
1455 UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset));
1456 UnicodeString compValue;
1457 compValue.append(LEFT_CURLY_BRACE);
1458 itos(argNumber, compValue);
1459 compValue.append(RIGHT_CURLY_BRACE);
1460 if (0 != strValue.compare(compValue)) {
1461 argResult.setString(strValue);
1462 haveArgResult = TRUE;
1464 sourceOffset = next;
1466 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
1467 tempStatus.setIndex(sourceOffset);
1468 double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus);
1469 if (tempStatus.getIndex() == sourceOffset) {
1470 pos.setErrorIndex(sourceOffset);
1471 return NULL; // leave index as is to signal error
1473 argResult.setDouble(choiceResult);
1474 haveArgResult = TRUE;
1475 sourceOffset = tempStatus.getIndex();
1476 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) {
1477 // Parsing not supported.
1478 ec = U_UNSUPPORTED_ERROR;
1481 // This should never happen.
1482 ec = U_INTERNAL_PROGRAM_ERROR;
1485 if (haveArgResult && count <= argNumber) {
1486 count = argNumber + 1;
1488 prevIndex=msgPattern.getPart(argLimit).getLimit();
1492 // -------------------------------------
1493 // Parses the source pattern and returns the Formattable objects array,
1494 // the array count and the ending parse position. The caller of this method
1498 MessageFormat::parse(const UnicodeString& source,
1500 int32_t& count) const {
1501 UErrorCode ec = U_ZERO_ERROR;
1502 return parse(0, source, pos, count, ec);
1505 // -------------------------------------
1506 // Parses the source string and returns the array of
1507 // Formattable objects and the array count. The caller
1508 // owns the returned array.
1511 MessageFormat::parse(const UnicodeString& source,
1513 UErrorCode& success) const
1515 if (msgPattern.hasNamedArguments()) {
1516 success = U_ARGUMENT_TYPE_MISMATCH;
1519 ParsePosition status(0);
1520 // Calls the actual implementation method and starts
1521 // from zero offset of the source text.
1522 Formattable* result = parse(source, status, cnt);
1523 if (status.getIndex() == 0) {
1524 success = U_MESSAGE_PARSE_ERROR;
1531 // -------------------------------------
1532 // Parses the source text and copy into the result buffer.
1535 MessageFormat::parseObject( const UnicodeString& source,
1536 Formattable& result,
1537 ParsePosition& status) const
1540 Formattable* tmpResult = parse(source, status, cnt);
1541 if (tmpResult != NULL)
1542 result.adoptArray(tmpResult, cnt);
1546 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1547 UnicodeString result;
1548 if (U_SUCCESS(status)) {
1549 int32_t plen = pattern.length();
1550 const UChar* pat = pattern.getBuffer();
1551 int32_t blen = plen * 2 + 1; // space for null termination, convenience
1552 UChar* buf = result.getBuffer(blen);
1554 status = U_MEMORY_ALLOCATION_ERROR;
1556 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1557 result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1560 if (U_FAILURE(status)) {
1561 result.setToBogus();
1566 // -------------------------------------
1568 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1569 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1571 ec = U_MEMORY_ALLOCATION_ERROR;
1572 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1573 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1574 fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1579 void MessageFormat::cacheExplicitFormats(UErrorCode& status) {
1580 if (U_FAILURE(status)) {
1584 if (cachedFormatters != NULL) {
1585 uhash_removeAll(cachedFormatters);
1587 if (customFormatArgStarts != NULL) {
1588 uhash_removeAll(customFormatArgStarts);
1591 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
1592 // which we need not examine.
1593 int32_t limit = msgPattern.countParts() - 2;
1595 // We also need not look at the first two "parts"
1596 // (at most MSG_START and ARG_START) in this loop.
1597 // We determine the argTypeCount first so that we can allocateArgTypes
1598 // so that the next loop can set argTypes[argNumber].
1599 // (This is for the C API which needs the argTypes to read its va_arg list.)
1600 for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) {
1601 const MessagePattern::Part& part = msgPattern.getPart(i);
1602 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
1603 const int argNumber = part.getValue();
1604 if (argNumber >= argTypeCount) {
1605 argTypeCount = argNumber + 1;
1609 if (!allocateArgTypes(argTypeCount, status)) {
1612 // Set all argTypes to kObject, as a "none" value, for lack of any better value.
1613 // We never use kObject for real arguments.
1614 // We use it as "no argument yet" for the check for hasArgTypeConflicts.
1615 for (int32_t i = 0; i < argTypeCount; ++i) {
1616 argTypes[i] = Formattable::kObject;
1618 hasArgTypeConflicts = FALSE;
1620 // This loop starts at part index 1 because we do need to examine
1621 // ARG_START parts. (But we can ignore the MSG_START.)
1622 for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) {
1623 const MessagePattern::Part* part = &msgPattern.getPart(i);
1624 if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) {
1627 UMessagePatternArgType argType = part->getArgType();
1629 int32_t argNumber = -1;
1630 part = &msgPattern.getPart(i + 1);
1631 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
1632 argNumber = part->getValue();
1634 Formattable::Type formattableType;
1637 case UMSGPAT_ARG_TYPE_NONE:
1638 formattableType = Formattable::kString;
1640 case UMSGPAT_ARG_TYPE_SIMPLE: {
1643 UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++));
1644 UnicodeString style;
1645 if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
1646 style = msgPattern.getSubstring(*part);
1649 UParseError parseError;
1650 Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status);
1651 setArgStartFormat(index, formatter, status);
1654 case UMSGPAT_ARG_TYPE_CHOICE:
1655 case UMSGPAT_ARG_TYPE_PLURAL:
1656 case UMSGPAT_ARG_TYPE_SELECTORDINAL:
1657 formattableType = Formattable::kDouble;
1659 case UMSGPAT_ARG_TYPE_SELECT:
1660 formattableType = Formattable::kString;
1663 status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable.
1664 formattableType = Formattable::kString;
1667 if (argNumber != -1) {
1668 if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) {
1669 hasArgTypeConflicts = TRUE;
1671 argTypes[argNumber] = formattableType;
1677 Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style,
1678 Formattable::Type& formattableType, UParseError& parseError,
1680 if (U_FAILURE(ec)) {
1684 int32_t typeID, styleID;
1685 DateFormat::EStyle date_style;
1687 switch (typeID = findKeyword(type, TYPE_IDS)) {
1689 formattableType = Formattable::kDouble;
1690 switch (findKeyword(style, NUMBER_STYLE_IDS)) {
1692 fmt = NumberFormat::createInstance(fLocale, ec);
1695 fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1698 fmt = NumberFormat::createPercentInstance(fLocale, ec);
1701 formattableType = Formattable::kLong;
1702 fmt = createIntegerFormat(fLocale, ec);
1705 fmt = NumberFormat::createInstance(fLocale, ec);
1707 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
1708 if (decfmt != NULL) {
1709 decfmt->applyPattern(style,parseError,ec);
1718 formattableType = Formattable::kDate;
1719 styleID = findKeyword(style, DATE_STYLE_IDS);
1720 date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1723 fmt = DateFormat::createDateInstance(date_style, fLocale);
1725 fmt = DateFormat::createTimeInstance(date_style, fLocale);
1728 if (styleID < 0 && fmt != NULL) {
1729 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
1730 if (sdtfmt != NULL) {
1731 sdtfmt->applyPattern(style);
1737 formattableType = Formattable::kDouble;
1738 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec);
1741 formattableType = Formattable::kDouble;
1742 fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec);
1745 formattableType = Formattable::kDouble;
1746 fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec);
1749 formattableType = Formattable::kString;
1750 ec = U_ILLEGAL_ARGUMENT_ERROR;
1758 //-------------------------------------
1759 // Finds the string, s, in the string array, list.
1760 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1761 const UChar * const *list)
1764 return 0; // default
1767 int32_t length = s.length();
1768 const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length);
1769 UnicodeString buffer(FALSE, ps, length);
1770 // Trims the space characters and turns all characters
1771 // in s to lower case.
1773 for (int32_t i = 0; list[i]; ++i) {
1774 if (!buffer.compare(list[i], u_strlen(list[i]))) {
1782 * Convenience method that ought to be in NumberFormat
1785 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1786 NumberFormat *temp = NumberFormat::createInstance(locale, status);
1787 DecimalFormat *temp2;
1788 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
1789 temp2->setMaximumFractionDigits(0);
1790 temp2->setDecimalSeparatorAlwaysShown(FALSE);
1791 temp2->setParseIntegerOnly(TRUE);
1798 * Return the default number format. Used to format a numeric
1799 * argument when subformats[i].format is NULL. Returns NULL
1802 * Semantically const but may modify *this.
1804 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1805 if (defaultNumberFormat == NULL) {
1806 MessageFormat* t = (MessageFormat*) this;
1807 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1808 if (U_FAILURE(ec)) {
1809 delete t->defaultNumberFormat;
1810 t->defaultNumberFormat = NULL;
1811 } else if (t->defaultNumberFormat == NULL) {
1812 ec = U_MEMORY_ALLOCATION_ERROR;
1815 return defaultNumberFormat;
1819 * Return the default date format. Used to format a date
1820 * argument when subformats[i].format is NULL. Returns NULL
1823 * Semantically const but may modify *this.
1825 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1826 if (defaultDateFormat == NULL) {
1827 MessageFormat* t = (MessageFormat*) this;
1828 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1829 if (t->defaultDateFormat == NULL) {
1830 ec = U_MEMORY_ALLOCATION_ERROR;
1833 return defaultDateFormat;
1837 MessageFormat::usesNamedArguments() const {
1838 return msgPattern.hasNamedArguments();
1842 MessageFormat::getArgTypeCount() const {
1843 return argTypeCount;
1846 UBool MessageFormat::equalFormats(const void* left, const void* right) {
1847 return *(const Format*)left==*(const Format*)right;
1851 UBool MessageFormat::DummyFormat::operator==(const Format&) const {
1855 Format* MessageFormat::DummyFormat::clone() const {
1856 return new DummyFormat();
1859 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1860 UnicodeString& appendTo,
1861 UErrorCode& status) const {
1862 if (U_SUCCESS(status)) {
1863 status = U_UNSUPPORTED_ERROR;
1868 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1869 UnicodeString& appendTo,
1871 UErrorCode& status) const {
1872 if (U_SUCCESS(status)) {
1873 status = U_UNSUPPORTED_ERROR;
1878 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1879 UnicodeString& appendTo,
1880 FieldPositionIterator*,
1881 UErrorCode& status) const {
1882 if (U_SUCCESS(status)) {
1883 status = U_UNSUPPORTED_ERROR;
1888 void MessageFormat::DummyFormat::parseObject(const UnicodeString&,
1890 ParsePosition& ) const {
1894 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1896 fFormatNames = fNameList;
1899 const UnicodeString*
1900 FormatNameEnumeration::snext(UErrorCode& status) {
1901 if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1902 return (const UnicodeString*)fFormatNames->elementAt(pos++);
1908 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1913 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1914 return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1917 FormatNameEnumeration::~FormatNameEnumeration() {
1918 delete fFormatNames;
1921 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t)
1922 : msgFormat(mf), rules(NULL), type(t) {
1925 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
1929 UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double number,
1930 UErrorCode& ec) const {
1931 if (U_FAILURE(ec)) {
1932 return UnicodeString(FALSE, OTHER_STRING, 5);
1934 MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this);
1936 t->rules = PluralRules::forLocale(msgFormat.fLocale, type, ec);
1937 if (U_FAILURE(ec)) {
1938 return UnicodeString(FALSE, OTHER_STRING, 5);
1941 // Select a sub-message according to how the number is formatted,
1942 // which is specified in the selected sub-message.
1943 // We avoid this circle by looking at how
1944 // the number is formatted in the "other" sub-message
1945 // which must always be present and usually contains the number.
1946 // Message authors should be consistent across sub-messages.
1947 PluralSelectorContext &context = *static_cast<PluralSelectorContext *>(ctx);
1948 int32_t otherIndex = msgFormat.findOtherSubMessage(context.startIndex);
1949 context.numberArgIndex = msgFormat.findFirstPluralNumberArg(otherIndex, context.argName);
1950 if(context.numberArgIndex > 0 && msgFormat.cachedFormatters != NULL) {
1952 (const Format*)uhash_iget(msgFormat.cachedFormatters, context.numberArgIndex);
1954 if(context.formatter == NULL) {
1955 context.formatter = msgFormat.getDefaultNumberFormat(ec);
1956 context.forReplaceNumber = TRUE;
1958 U_ASSERT(context.number.getDouble(ec) == number); // argument number minus the offset
1959 context.formatter->format(context.number, context.numberString, ec);
1960 const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
1961 if(decFmt != NULL) {
1962 FixedDecimal dec = decFmt->getFixedDecimal(context.number, ec);
1963 return rules->select(dec);
1965 return rules->select(number);
1969 void MessageFormat::PluralSelectorProvider::reset() {
1977 #endif /* #if !UCONFIG_NO_FORMATTING */