2 // Open Service Platform
3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 // Licensed under the Apache License, Version 2.0 (the License);
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
19 * @file FBaseCharacter.h
20 * @brief This is the header file for the %Character class.
22 * This header file contains the declarations of the %Character class.
24 #ifndef _FBASE_CHARACTER_H_
25 #define _FBASE_CHARACTER_H_
27 #include <FBaseObject.h>
28 #include <FBaseString.h>
31 namespace Tizen { namespace Base
34 * @enum UnicodeCategory
36 * Defines the constants used to distinguish the categories of Unicode characters.
42 UNICODE_SURROGATE = 1, /**< The surrogate Unicode category */
43 UNICODE_MODIFIER, /**< The spacing modifier Unicode category */
44 UNICODE_ARROW, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
45 UNICODE_SPACE, /**< The space Unicode category */
46 UNICODE_PUNCTUATION, /**< The punctuation Unicode category */
47 UNICODE_CONTROL, /**< The control Unicode category */
48 UNICODE_MATH, /**< The math Unicode category */
49 UNICODE_DIGIT, /**< The digit Unicode category */
50 UNICODE_HANGUL, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
51 UNICODE_HANJA, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
52 UNICODE_COMBINING, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
53 UNICODE_LANGUAGE, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
54 UNICODE_UPPERCASE, /**< The uppercase Unicode category*/
55 UNICODE_LOWERCASE, /**< The lowercase Unicode category*/
56 UNICODE_TITLECASE, /**< The titlecase Unicode category*/
57 UNICODE_LETTER, /**< The letter Unicode category*/
58 UNICODE_MARK, /**< The mark Unicode category*/
59 UNICODE_CURRENCY, /**< The currency Unicode category*/
60 UNICODE_SEPARATOR, /**< The separator Unicode category*/
61 UNICODE_OTHER /**< The other Unicode category */
67 * @brief This class is the wrapper class for the @c wchar_t built-in type.
71 * The %Character class wraps a value of the @c wchar_t type. It also provides
72 * several methods for determining a Unicode character's category, and for
73 * converting the case of intrinsic characters. The class is useful when
74 * passing a Unicode character to a method expecting an instance of Object.
76 * The following example demonstrates how to use the %Character class.
82 * using namespace Tizen::Base;
84 * // This method converts the first character of the given @c string to the upper case.
86 * MyClass::CharacterSample(String& str)
90 * str.GetCharAt(0, chr);
92 * // Converts to upper case
93 * upper = Character::ToUpper(chr);
95 * str.SetCharAt(upper, 0);
99 class _OSP_EXPORT_ Character
104 * Initializes this instance of %Character with the specified parameter.
108 * @param[in] value A multi-byte character used to initialize the %Character instance
110 Character(wchar_t value);
113 * Copying of objects using this copy constructor is allowed.
117 * @param[in] value An instance of %Character
119 Character(const Character& value);
122 * This destructor overrides Tizen::Base::Object::~Object().
126 virtual ~Character(void);
129 * Copying of objects using this copy assignment operator is allowed.
133 * @param[in] rhs An instance of %Character
135 Character& operator =(const Character& rhs);
138 * Compares (ordinal comparison) the value of the calling instance
139 * with the value of the input %Character instance.
143 * @return A 32-bit @c signed integer value
144 * @param[in] value The character instance to compare with
147 * @li < 0 if the value of the current instance is less than the value of the input instance
148 * @li == 0 if the value of the current instance is equal to the value of the input instance
149 * @li > 0 if the value of the current instance is greater than the value of the input instance
153 int CompareTo(const Character& value) const;
156 * Checks whether the value of the Object parameter is equal
157 * to the value of the calling object.
160 * @return @c true if the input Object is equal to the calling %Object, @n
162 * @param[in] obj The object to compare with the calling object
163 * @see Tizen::Base::Object::Equals()
165 virtual bool Equals(const Object& obj) const;
168 * Gets the hash value of the current instance.
172 * @return The hash value of the current instance
173 * @remarks The two Tizen::Base::Object::Equals() instances must return the same hash value. For better performance,
174 * the used hash function must generate a random distribution for all inputs.
176 virtual int GetHashCode(void) const;
179 * Returns the value of the current instance as a
184 * @return The value of this instance as
187 wchar_t ToMchar(void) const;
191 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
192 * The Unicode characters other than English alphabets are not changed.
194 * @brief <i> [Deprecated] </i>
195 * @deprecated This method is deprecated as a new method has been introduced.
196 * Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than English alphabets.
205 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
206 * The Unicode characters other than English alphabets are also supported.
210 void ToLowerCase(void);
215 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
216 * The Unicode characters other than English alphabets are not changed.
218 * @brief <i> [Deprecated] </i>
219 * @deprecated This method is deprecated as a new method has been introduced.
220 * Instead of using this method, use the ToUpperCase() method that supports the Unicode characters other than English alphabets.
229 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
230 * The Unicode characters other than English alphabets are also supported.
234 void ToUpperCase(void);
238 * Returns a string representing the value of the calling %Character instance.
242 * @return An instance of String class that
243 * contains a Unicode representation of the calling instance
245 String ToString(void) const;
248 * Returns a string representation of the input
253 * @return An instance of the String class
254 * that contains the Unicode representation of the
256 * @param[in] value The Unicode character to convert
258 static String ToString(wchar_t value);
261 * Categorizes a Unicode character into a group identified by
262 * one of the UnicodeCategory values.
266 * @return A value of type UnicodeCategory that identifies the group that contains the specified @c ch
267 * @param[in] ch The Unicode character to categorize
270 static UnicodeCategory GetUnicodeCategory(wchar_t ch);
274 * Converts the input Unicode character to its equivalent lowercase. @n
275 * The Unicode characters other than English alphabets are not changed.
277 * @brief <i> [Deprecated] </i>
278 * @deprecated This method is deprecated as a new method has been introduced.
279 * Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports the Unicode characters other than English alphabets.
282 * @return An lowercase equivalent of the input Unicode character
283 * @param[in] ch The Unicode character to convert
286 static wchar_t ToLower(wchar_t ch);
289 * Converts the input Unicode character to its equivalent lowercase. @n
290 * The Unicode characters other than English alphabets are also supported.
294 * @return An lowercase equivalent of the input Unicode character
295 * @param[in] ch The Unicode character to convert
297 static wchar_t ToLowerCase(wchar_t ch);
301 * Converts the input Unicode character to its equivalent uppercase. @n
302 * The Unicode characters other than English alphabets are not changed.
304 * @brief <i> [Deprecated] </i>
305 * @deprecated This method is deprecated as a new method has been introduced.
306 * Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than English alphabets.
309 * @return An uppercase equivalent of the input Unicode character
310 * @param[in] ch The Unicode character to convert
313 static wchar_t ToUpper(wchar_t ch);
316 * Converts the input Unicode character to its equivalent uppercase. @n
317 * The Unicode characters other than English alphabets are also supported.
321 * @return An uppercase equivalent of the input Unicode character
322 * @param[in] ch The Unicode character to convert
324 static wchar_t ToUpperCase(wchar_t ch);
327 * Checks whether the input character is an alphanumeric character (letter or digit). @n
328 * A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns true for the character
330 * @brief <i> [Compatibility] </i>
334 * @compatibility This method has compatibility issues with OSP compatible applications. @n
335 * For more information, see @ref CompCharacterIsAlphaNumericPage "here".
337 * @return @c true if the input character is alphanumeric, @n
339 * @param[in] ch The Unicode character
341 static bool IsAlphaNumeric(wchar_t ch);
345 * @page CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
346 * @section CompCharacterIsAlphaNumericPageIssueSection Issues
347 * Implementing this method in OSP compatible applications has the following issues: @n
349 * -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter and digit category.
351 * @section CompCharacterIsAlphaNumericPageSolutionSection Resolutions
353 * This issue has been resolved in Tizen.
359 * Checks whether the input character is a digit.
363 * @return @c true if the input character is a digit, @n
365 * @param[in] ch The Unicode character
367 static bool IsDigit(wchar_t ch);
370 * Checks whether the input character is a Unicode letter.
372 * @brief <i> [Compatibility] </i>
376 * @compatibility This method has compatibility issues with OSP compatible applications. @n
377 * For more information, see @ref CompCharacterIsLetterPage "here".
379 * @return @c true if the input character is an alphabet, @n
381 * @param[in] ch The Unicode character
383 static bool IsLetter(wchar_t ch);
387 * @page CompCharacterIsLetterPage Compatibility for IsLetter()
388 * @section CompCharacterIsLetterPageIssueSection Issues
389 * Implementing this method in OSP compatible applications has the following issues: @n
391 * -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter category.
393 * @section CompCharacterIsLetterPageSolutionSection Resolutions
395 * This issue has been resolved in Tizen.
401 * Checks whether the input character is a lowercase alphabet.
403 * @brief <i> [Deprecated] </i>
404 * @deprecated This method is deprecated as a new method has been introduced.
405 * Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
408 * @return @c true if the input character is a lowercase English alphabet, @n
410 * @param[in] ch The Unicode character
413 static bool IsLower(wchar_t ch);
416 * Checks whether the input character is a lowercase alphabet. @n
417 * Unicode characters other than English alphabets are also supported.
421 * @return @c true if the input character is a lowercase alphabet, @n
423 * @param[in] ch The Unicode character
425 static bool IsLowerCase(wchar_t ch);
429 * Checks whether the input character is an uppercase alphabet.
431 * @brief <i> [Deprecated] </i>
432 * @deprecated This method is deprecated as a new method has been introduced.
433 * Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than English alphabets.
436 * @return @c true if the input character is an uppercase alphabet, @n
438 * @param[in] ch The Unicode character
441 static bool IsUpper(wchar_t ch);
444 * Checks whether the input character is an uppercase alphabet. @n
445 * Unicode characters other than English alphabets are also supported.
449 * @return @c true if the input character is an uppercase alphabet, @n
451 * @param[in] ch The Unicode character
453 static bool IsUpperCase(wchar_t ch);
456 * Returns the value of the input character in the supplied radix. @n
457 * The value of radix must be between RADIX_MIN and RADIX_MAX.
461 * @return A integer value of the input character in the supplied radix
462 * @param[in] ch The character to determine the value
463 * @param[in] radix The radix
465 static int ToDigit(wchar_t ch, int radix);
468 * Returns the value which represents the input digit with specified radix. @n
469 * The value of radix must be between RADIX_MIN and RADIX_MAX.
473 * @return A Unicode character of the input digit with specified @c radix @n
474 * else @c null character (U+0000)
475 * @param[in] digit The digit to determine the value
476 * @param[in] radix The radix
478 static wchar_t ForDigit(int digit, int radix);
481 * Gets the numeric value of the input unicode character. @n
482 * This is used when some numeric values are fractions, negative, or too large for @c int value.
486 * @return A @c double value @n NO_NUMERIC_VALUE for characters without any numeric values in the Unicode %Character.
487 * @param[in] ch A Unicode character
489 static double GetNumericValue(wchar_t ch);
492 * Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
496 * @return @c true if the Unicode character is an assigned character, @n
498 * @param[in] ch A Unicode character
500 static bool IsDefined(wchar_t ch);
503 * Checks whether the input character is a whitespace character. @n
504 * A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
508 * @return @c true if the Unicode character is a whitespace character, @n
510 * @param[in] ch A Unicode character
513 * It is a Unicode Separator character, but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
514 * It is U+0009 HORIZONTAL TABULATION.
515 * It is U+000A LINE FEED.
516 * It is U+000B VERTICAL TABULATION.
517 * It is U+000C FORM FEED.
518 * It is U+000D CARRIAGE RETURN.
519 * It is U+001C FILE SEPARATOR.
520 * It is U+001D GROUP SEPARATOR.
521 * It is U+001E RECORD SEPARATOR.
522 * It is U+001F UNIT SEPARATOR.
525 static bool IsWhitespace(wchar_t ch);
529 * Checks whether the input character is a title character.
533 * @return @c true if the Unicode character is a title character, @n
535 * @param[in] ch A Unicode character
537 static bool IsTitleCase(wchar_t ch);
540 * Returns the title case equivalent for the input character if it exists.
544 * @return A title case character equivalent for the input character @n The character itself is returned if none is defined.
545 * @param[in] ch A Unicode character
547 static wchar_t ToTitleCase(wchar_t ch);
550 * Checks whether the input character is an ISO control code or not.
554 * @return @c true if the Unicode character is an ISO control character, @n
556 * @param[in] ch A Unicode character
558 static bool IsISOControl(wchar_t ch);
562 * A constant holding the largest value of type @c wchar_t, 0xFFFF.
566 static const wchar_t VALUE_MAX = 0x10FFFF;
569 * A constant holding the smallest value of type wchar_t, 0x0000.
573 static const wchar_t VALUE_MIN = 0x0000;
576 * The minimum radix available for conversion to and from strings. @n
577 * Same value as RADIX_BINARY.
581 static const int RADIX_MIN = 2;
584 * The radix for binary number.
588 static const int RADIX_BINARY = 2;
591 * The radix for decimal number.
595 static const int RADIX_DECIMAL = 10;
598 * The radix for octal number.
602 static const int RADIX_OCTAL = 8;
605 * The radix for hexadecimal number.
609 static const int RADIX_HEXADECIMAL = 16;
612 * The maximum radix available for conversion to and from strings. Same value as RADIX_HEXADECIMAL.
616 static const int RADIX_MAX = 36;
619 * Special value that is returned by GetNumericValue(wchar_t ch) when no numeric value is defined for the unicode character.
623 static const double NO_NUMERIC_VALUE = -123456789.0;
629 friend class Integer;
633 friend class _CharacterImpl;
634 class _CharacterImpl * __pCharacterImpl;
640 #endif // _FBASE_CHARACTER_H_