2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FBaseCharacter.h
19 * @brief This is the header file for the %Character class.
21 * This header file contains the declarations of the %Character class.
23 #ifndef _FBASE_CHARACTER_H_
24 #define _FBASE_CHARACTER_H_
26 #include <FBaseObject.h>
27 #include <FBaseString.h>
30 namespace Tizen { namespace Base
33 * @enum UnicodeCategory
35 * Defines the constants used to distinguish the categories of the Unicode characters.
41 UNICODE_SURROGATE = 1, /**< The surrogate Unicode category */
42 UNICODE_MODIFIER, /**< The spacing modifier Unicode category */
43 UNICODE_ARROW, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
44 UNICODE_SPACE, /**< The space Unicode category */
45 UNICODE_PUNCTUATION, /**< The punctuation Unicode category */
46 UNICODE_CONTROL, /**< The control Unicode category */
47 UNICODE_MATH, /**< The math Unicode category */
48 UNICODE_DIGIT, /**< The digit Unicode category */
49 UNICODE_HANGUL, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
50 UNICODE_HANJA, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
51 UNICODE_COMBINING, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
52 UNICODE_LANGUAGE, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
53 UNICODE_UPPERCASE, /**< The uppercase Unicode category */
54 UNICODE_LOWERCASE, /**< The lowercase Unicode category */
55 UNICODE_TITLECASE, /**< The titlecase Unicode category */
56 UNICODE_LETTER, /**< The letter Unicode category */
57 UNICODE_MARK, /**< The mark Unicode category */
58 UNICODE_CURRENCY, /**< The currency Unicode category */
59 UNICODE_SEPARATOR, /**< The separator Unicode category */
60 UNICODE_OTHER /**< The other Unicode category */
66 * @brief This class is the wrapper class for the @c wchar_t built-in type.
70 * The %Character class wraps the value of the @c wchar_t type. It also provides
71 * several methods for determining a Unicode character's category, and for
72 * converting the case of the intrinsic characters. This class is useful when
73 * passing a Unicode character to a method expecting an instance of Object.
75 * The following example demonstrates how to use the %Character class.
81 * using namespace Tizen::Base;
83 * // This method converts the first character of the given string to the upper case.
85 * MyClass::CharacterSample(String& str)
89 * str.GetCharAt(0, chr);
91 * // Converts to upper case
92 * upper = Character::ToUpper(chr);
94 * str.SetCharAt(upper, 0);
98 class _OSP_EXPORT_ Character
103 * Initializes this instance of %Character with the specified parameter.
107 * @param[in] value The multi-byte character used to initialize the %Character instance
109 Character(wchar_t value);
112 * Copying of objects using this copy constructor is allowed.
116 * @param[in] value An instance of %Character to copy
118 Character(const Character& value);
121 * This destructor overrides Tizen::Base::Object::~Object().
125 virtual ~Character(void);
128 * Copying of objects using this copy assignment operator is allowed.
132 * @param[in] rhs An instance of %Character to copy
134 Character& operator =(const Character& rhs);
137 * Compares (ordinal comparison) the value of the calling instance
138 * with the value of the input %Character instance.
142 * @return The 32-bit @c signed integer value
143 * @param[in] value The %Character instance to compare with
146 * @li < 0 if the value of the current instance is less than the value of the input instance
147 * @li == 0 if the value of the current instance is equal to the value of the input instance
148 * @li > 0 if the value of the current instance is greater than the value of the input instance
152 int CompareTo(const Character& value) const;
155 * Checks whether the value of the input Object is equal
156 * to the value of the calling %Object.
159 * @return @c true if the input Object is equal to the calling %Object, @n
161 * @param[in] obj The object to compare with the calling object
162 * @see Tizen::Base::Object::Equals()
164 virtual bool Equals(const Object& obj) const;
167 * Gets the hash value of the current instance.
171 * @return The hash value of the current instance
172 * @remarks The two Tizen::Base::Object::Equals() instances must return the same hash value. @n
173 * For better performance,the used hash function must generate a random distribution for all the inputs.
175 virtual int GetHashCode(void) const;
178 * Returns the value of the current instance as a @c wchar_t value.
182 * @return The value of this instance as a @c wchar_t value
184 wchar_t ToMchar(void) const;
188 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
189 * Unicode characters other than the English alphabets are not changed.
191 * @brief <i> [Deprecated] </i>
192 * @deprecated This method is deprecated as a new method has been introduced.
193 * Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than the English alphabets.
202 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
203 * Unicode characters other than the English alphabets are also supported.
207 void ToLowerCase(void);
212 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
213 * Unicode characters other than the English alphabets are not changed.
215 * @brief <i> [Deprecated] </i>
216 * @deprecated This method is deprecated as a new method has been introduced.
217 * Instead of using this method, use the ToUpperCase() method that supports Unicode characters other than the English alphabets.
226 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
227 * Unicode characters other than English alphabets are also supported.
231 void ToUpperCase(void);
235 * Returns a string that represents the value of the calling %Character instance.
239 * @return An instance of String that
240 * contains a Unicode representation of the calling instance
242 String ToString(void) const;
245 * Returns a string representation of the input
250 * @return An instance of String
251 * that contains the Unicode representation of the
253 * @param[in] value The Unicode character to convert
255 static String ToString(wchar_t value);
258 * Categorizes a Unicode character into a group that is identified by
259 * one of the UnicodeCategory values.
263 * @return The value of type UnicodeCategory that identifies the group that contains the specified @c ch
264 * @param[in] ch The Unicode character to categorize
267 static UnicodeCategory GetUnicodeCategory(wchar_t ch);
271 * Converts the input Unicode character to its equivalent lowercase. @n
272 * Unicode characters other than the English alphabets are not changed.
274 * @brief <i> [Deprecated] </i>
275 * @deprecated This method is deprecated as a new method has been introduced.
276 * Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports Unicode characters other than the English alphabets.
279 * @return The lowercase equivalent of the input Unicode character
280 * @param[in] ch The Unicode character to convert
283 static wchar_t ToLower(wchar_t ch);
286 * Converts the input Unicode character to its equivalent lowercase. @n
287 * Unicode characters other than the English alphabets are also supported.
291 * @return The lowercase equivalent of the input Unicode character
292 * @param[in] ch The Unicode character to convert
294 static wchar_t ToLowerCase(wchar_t ch);
298 * Converts the input Unicode character to its equivalent uppercase. @n
299 * Unicode characters other than the English alphabets are not changed.
301 * @brief <i> [Deprecated] </i>
302 * @deprecated This method is deprecated as a new method has been introduced.
303 * Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than the English alphabets.
306 * @return The uppercase equivalent of the input Unicode character
307 * @param[in] ch The Unicode character to convert
310 static wchar_t ToUpper(wchar_t ch);
313 * Converts the input Unicode character to its equivalent uppercase. @n
314 * Unicode characters other than the English alphabets are also supported.
318 * @return The uppercase equivalent of the input Unicode character
319 * @param[in] ch The Unicode character to convert
321 static wchar_t ToUpperCase(wchar_t ch);
324 * Checks whether the input character is an alphanumeric character (letter or digit). @n
325 * A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns @c true for the character
327 * @brief <i> [Compatibility] </i>
331 * @compatibility This method has compatibility issues with OSP compatible applications. @n
332 * For more information, see @ref CompCharacterIsAlphaNumericPage "here".
334 * @return @c true if the input character is alphanumeric, @n
336 * @param[in] ch The Unicode character
338 static bool IsAlphaNumeric(wchar_t ch);
342 * @page CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
343 * @section CompCharacterIsAlphaNumericPageIssueSection Issues
344 * Implementing this method in OSP compatible applications has the following issues: @n
346 * -# The method returns @c true only if the character is an alphabet character, it cannot check other Unicode characters in the letter and digit category.
348 * @section CompCharacterIsAlphaNumericPageSolutionSection Resolutions
350 * This issue mentioned above is resolved in %Tizen.
356 * Checks whether the input character is a digit.
360 * @return @c true if the input character is a digit, @n
362 * @param[in] ch The Unicode character
364 static bool IsDigit(wchar_t ch);
367 * Checks whether the input character is a Unicode letter.
369 * @brief <i> [Compatibility] </i>
373 * @compatibility This method has compatibility issues with OSP compatible applications. @n
374 * For more information, see @ref CompCharacterIsLetterPage "here".
376 * @return @c true if the input character is an alphabet, @n
378 * @param[in] ch The Unicode character
380 static bool IsLetter(wchar_t ch);
384 * @page CompCharacterIsLetterPage Compatibility for IsLetter()
385 * @section CompCharacterIsLetterPageIssueSection Issues
386 * Implementing this method in OSP compatible applications has the following issues: @n
388 * -# The method returns @c true only if the character is an alphabet character, it cannot check other Unicode characters in the letter category.
390 * @section CompCharacterIsLetterPageSolutionSection Resolutions
392 * This issue mentioned above is resolved in %Tizen.
398 * Checks whether the input character is a lowercase alphabet.
400 * @brief <i> [Deprecated] </i>
401 * @deprecated This method is deprecated as a new method has been introduced.
402 * Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
405 * @return @c true if the input character is a lowercase alphabet, @n
407 * @param[in] ch The Unicode character
410 static bool IsLower(wchar_t ch);
413 * Checks whether the input character is a lowercase alphabet. @n
414 * Unicode characters other than English alphabets are also supported.
418 * @return @c true if the input character is a lowercase alphabet, @n
420 * @param[in] ch The Unicode character
422 static bool IsLowerCase(wchar_t ch);
426 * Checks whether the input character is an uppercase alphabet.
428 * @brief <i> [Deprecated] </i>
429 * @deprecated This method is deprecated as a new method has been introduced.
430 * Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than the English alphabets.
433 * @return @c true if the input character is an uppercase alphabet, @n
435 * @param[in] ch The Unicode character
438 static bool IsUpper(wchar_t ch);
441 * Checks whether the input character is an uppercase alphabet. @n
442 * Unicode characters other than English alphabets are also supported.
446 * @return @c true if the input character is an uppercase alphabet, @n
448 * @param[in] ch The Unicode character
450 static bool IsUpperCase(wchar_t ch);
453 * Returns the value of the input character in the supplied @c radix. @n
454 * The value of @c radix must be between ::RADIX_MIN and ::RADIX_MAX.
458 * @return The integer value of the input character in the supplied @c radix
459 * @param[in] ch The character that determines the value
460 * @param[in] radix The radix
462 static int ToDigit(wchar_t ch, int radix);
465 * Returns the value which represents the input digit in the specified @c radix. @n
466 * The value of @c radix must be between ::RADIX_MIN and ::RADIX_MAX.
470 * @return The Unicode character of the input digit in the specified @c radix @n
471 * else a @c null character (U+0000)
472 * @param[in] digit The digit that determines the value
473 * @param[in] radix The radix
475 static wchar_t ForDigit(int digit, int radix);
478 * Gets the numeric value of the input unicode character. @n
479 * This is used when some numeric values are fractions, negative, or too large for the @c int value.
483 * @return The @c double value @n
484 * NO_NUMERIC_VALUE is returned for characters without any numeric values in the Unicode character.
485 * @param[in] ch The Unicode character
487 static double GetNumericValue(wchar_t ch);
490 * Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
494 * @return @c true if the Unicode character is an assigned character, @n
496 * @param[in] ch The Unicode character
498 static bool IsDefined(wchar_t ch);
501 * Checks whether the input character is a whitespace character. @n
502 * A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
506 * @return @c true if the Unicode character is a whitespace character, @n
508 * @param[in] ch The Unicode character
511 * It is a Unicode Separator character, but is also not a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
512 * It is U+0009 HORIZONTAL TABULATION.
513 * It is U+000A LINE FEED.
514 * It is U+000B VERTICAL TABULATION.
515 * It is U+000C FORM FEED.
516 * It is U+000D CARRIAGE RETURN.
517 * It is U+001C FILE SEPARATOR.
518 * It is U+001D GROUP SEPARATOR.
519 * It is U+001E RECORD SEPARATOR.
520 * It is U+001F UNIT SEPARATOR.
523 static bool IsWhitespace(wchar_t ch);
527 * Checks whether the input character is a title character.
531 * @return @c true if the Unicode character is a title character, @n
533 * @param[in] ch The Unicode character
535 static bool IsTitleCase(wchar_t ch);
538 * Returns the title case equivalent for the input character if it exists.
542 * @return The title case character equivalent for the input character @n
543 * The character itself is returned if no equivalent is defined.
544 * @param[in] ch The Unicode character
546 static wchar_t ToTitleCase(wchar_t ch);
549 * Checks whether the input character is an ISO control code.
553 * @return @c true if the Unicode character is an ISO control character, @n
555 * @param[in] ch The Unicode character
557 static bool IsISOControl(wchar_t ch);
561 * A constant holding the largest value of type @c wchar_t, 0xFFFF.
565 static const wchar_t VALUE_MAX = 0x10FFFF;
568 * A constant holding the smallest value of type @c wchar_t, 0x0000.
572 static const wchar_t VALUE_MIN = 0x0000;
575 * The minimum radix available for converting to and from strings. @n
576 * Same value as ::RADIX_BINARY.
580 static const int RADIX_MIN = 2;
583 * The radix for a binary number.
587 static const int RADIX_BINARY = 2;
590 * The radix for a decimal number.
594 static const int RADIX_DECIMAL = 10;
597 * The radix for an octal number.
601 static const int RADIX_OCTAL = 8;
604 * The radix for a hexadecimal number.
608 static const int RADIX_HEXADECIMAL = 16;
611 * The maximum radix available for converting to and from strings. Same value as ::RADIX_HEXADECIMAL.
615 static const int RADIX_MAX = 36;
618 * The special value that is returned by the GetNumericValue(wchar_t ch) method when no numeric value is defined for the unicode character.
622 static const double NO_NUMERIC_VALUE = -123456789.0;
628 friend class Integer;
632 friend class _CharacterImpl;
633 class _CharacterImpl* __pCharacterImpl;
639 #endif // _FBASE_CHARACTER_H_