2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FBaseCharacter.h
19 * @brief This is the header file for the %Character class.
21 * This header file contains the declarations of the %Character class.
23 #ifndef _FBASE_CHARACTER_H_
24 #define _FBASE_CHARACTER_H_
26 #include <FBaseObject.h>
27 #include <FBaseString.h>
30 namespace Tizen { namespace Base
33 * @enum UnicodeCategory
35 * Defines the constants used to distinguish the categories of Unicode characters.
41 UNICODE_SURROGATE = 1, /**< The surrogate Unicode category */
42 UNICODE_MODIFIER, /**< The spacing modifier Unicode category */
43 UNICODE_ARROW, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
44 UNICODE_SPACE, /**< The space Unicode category */
45 UNICODE_PUNCTUATION, /**< The punctuation Unicode category */
46 UNICODE_CONTROL, /**< The control Unicode category */
47 UNICODE_MATH, /**< The math Unicode category */
48 UNICODE_DIGIT, /**< The digit Unicode category */
49 UNICODE_HANGUL, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
50 UNICODE_HANJA, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
51 UNICODE_COMBINING, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
52 UNICODE_LANGUAGE, /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
53 UNICODE_UPPERCASE, /**< The uppercase Unicode category*/
54 UNICODE_LOWERCASE, /**< The lowercase Unicode category*/
55 UNICODE_TITLECASE, /**< The titlecase Unicode category*/
56 UNICODE_LETTER, /**< The letter Unicode category*/
57 UNICODE_MARK, /**< The mark Unicode category*/
58 UNICODE_CURRENCY, /**< The currency Unicode category*/
59 UNICODE_SEPARATOR, /**< The separator Unicode category*/
60 UNICODE_OTHER /**< The other Unicode category */
66 * @brief This class is the wrapper class for the @c wchar_t built-in type.
70 * The %Character class wraps a value of the @c wchar_t type. It also provides
71 * several methods for determining a Unicode character's category, and for
72 * converting the case of intrinsic characters. The class is useful when
73 * passing a Unicode character to a method expecting an instance of Object.
75 * The following example demonstrates how to use the %Character class.
81 * using namespace Tizen::Base;
83 * // This method converts the first character of the given @c string to the upper case.
85 * MyClass::CharacterSample(String& str)
89 * str.GetCharAt(0, chr);
91 * // Converts to upper case
92 * upper = Character::ToUpper(chr);
94 * str.SetCharAt(upper, 0);
98 class _OSP_EXPORT_ Character
103 * Initializes this instance of %Character with the specified parameter.
107 * @param[in] value A multi-byte character used to initialize the %Character instance
109 Character(wchar_t value);
112 * Copying of objects using this copy constructor is allowed.
116 * @param[in] value An instance of %Character
118 Character(const Character& value);
121 * This destructor overrides Tizen::Base::Object::~Object().
125 virtual ~Character(void);
128 * Copying of objects using this copy assignment operator is allowed.
132 * @param[in] rhs An instance of %Character
134 Character& operator =(const Character& rhs);
137 * Compares (ordinal comparison) the value of the calling instance
138 * with the value of the input %Character instance.
142 * @return A 32-bit @c signed integer value
143 * @param[in] value The character instance to compare with
146 * @li < 0 if the value of the current instance is less than the value of the input instance
147 * @li == 0 if the value of the current instance is equal to the value of the input instance
148 * @li > 0 if the value of the current instance is greater than the value of the input instance
152 int CompareTo(const Character& value) const;
155 * Checks whether the value of the Object parameter is equal
156 * to the value of the calling object.
159 * @return @c true if the input Object is equal to the calling %Object, @n
161 * @param[in] obj The object to compare with the calling object
162 * @see Tizen::Base::Object::Equals()
164 virtual bool Equals(const Object& obj) const;
167 * Gets the hash value of the current instance.
171 * @return The hash value of the current instance
172 * @remarks The two Tizen::Base::Object::Equals() instances must return the same hash value. For better performance,
173 * the used hash function must generate a random distribution for all inputs.
175 virtual int GetHashCode(void) const;
178 * Returns the value of the current instance as a
183 * @return The value of this instance as
186 wchar_t ToMchar(void) const;
190 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
191 * The Unicode characters other than English alphabets are not changed.
193 * @brief <i> [Deprecated] </i>
194 * @deprecated This method is deprecated as a new method has been introduced.
195 * Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than English alphabets.
204 * Converts the Unicode characters of the calling object to its equivalent lowercase. @n
205 * The Unicode characters other than English alphabets are also supported.
209 void ToLowerCase(void);
214 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
215 * The Unicode characters other than English alphabets are not changed.
217 * @brief <i> [Deprecated] </i>
218 * @deprecated This method is deprecated as a new method has been introduced.
219 * Instead of using this method, use the ToUpperCase() method that supports the Unicode characters other than English alphabets.
228 * Converts the Unicode characters of the current object to its equivalent uppercase. @n
229 * The Unicode characters other than English alphabets are also supported.
233 void ToUpperCase(void);
237 * Returns a string representing the value of the calling %Character instance.
241 * @return An instance of String class that
242 * contains a Unicode representation of the calling instance
244 String ToString(void) const;
247 * Returns a string representation of the input
252 * @return An instance of the String class
253 * that contains the Unicode representation of the
255 * @param[in] value The Unicode character to convert
257 static String ToString(wchar_t value);
260 * Categorizes a Unicode character into a group identified by
261 * one of the UnicodeCategory values.
265 * @return A value of type UnicodeCategory that identifies the group that contains the specified @c ch
266 * @param[in] ch The Unicode character to categorize
269 static UnicodeCategory GetUnicodeCategory(wchar_t ch);
273 * Converts the input Unicode character to its equivalent lowercase. @n
274 * The Unicode characters other than English alphabets are not changed.
276 * @brief <i> [Deprecated] </i>
277 * @deprecated This method is deprecated as a new method has been introduced.
278 * Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports the Unicode characters other than English alphabets.
281 * @return An lowercase equivalent of the input Unicode character
282 * @param[in] ch The Unicode character to convert
285 static wchar_t ToLower(wchar_t ch);
288 * Converts the input Unicode character to its equivalent lowercase. @n
289 * The Unicode characters other than English alphabets are also supported.
293 * @return An lowercase equivalent of the input Unicode character
294 * @param[in] ch The Unicode character to convert
296 static wchar_t ToLowerCase(wchar_t ch);
300 * Converts the input Unicode character to its equivalent uppercase. @n
301 * The Unicode characters other than English alphabets are not changed.
303 * @brief <i> [Deprecated] </i>
304 * @deprecated This method is deprecated as a new method has been introduced.
305 * Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than English alphabets.
308 * @return An uppercase equivalent of the input Unicode character
309 * @param[in] ch The Unicode character to convert
312 static wchar_t ToUpper(wchar_t ch);
315 * Converts the input Unicode character to its equivalent uppercase. @n
316 * The Unicode characters other than English alphabets are also supported.
320 * @return An uppercase equivalent of the input Unicode character
321 * @param[in] ch The Unicode character to convert
323 static wchar_t ToUpperCase(wchar_t ch);
326 * Checks whether the input character is an alphanumeric character (letter or digit). @n
327 * A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns true for the character
329 * @brief <i> [Compatibility] </i>
333 * @compatibility This method has compatibility issues with OSP compatible applications. @n
334 * For more information, see @ref CompCharacterIsAlphaNumericPage "here".
336 * @return @c true if the input character is alphanumeric, @n
338 * @param[in] ch The Unicode character
340 static bool IsAlphaNumeric(wchar_t ch);
344 * @page CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
345 * @section CompCharacterIsAlphaNumericPageIssueSection Issues
346 * Implementing this method in OSP compatible applications has the following issues: @n
348 * -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter and digit category.
350 * @section CompCharacterIsAlphaNumericPageSolutionSection Resolutions
352 * This issue has been resolved in Tizen.
358 * Checks whether the input character is a digit.
362 * @return @c true if the input character is a digit, @n
364 * @param[in] ch The Unicode character
366 static bool IsDigit(wchar_t ch);
369 * Checks whether the input character is a Unicode letter.
371 * @brief <i> [Compatibility] </i>
375 * @compatibility This method has compatibility issues with OSP compatible applications. @n
376 * For more information, see @ref CompCharacterIsLetterPage "here".
378 * @return @c true if the input character is an alphabet, @n
380 * @param[in] ch The Unicode character
382 static bool IsLetter(wchar_t ch);
386 * @page CompCharacterIsLetterPage Compatibility for IsLetter()
387 * @section CompCharacterIsLetterPageIssueSection Issues
388 * Implementing this method in OSP compatible applications has the following issues: @n
390 * -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter category.
392 * @section CompCharacterIsLetterPageSolutionSection Resolutions
394 * This issue has been resolved in Tizen.
400 * Checks whether the input character is a lowercase alphabet.
402 * @brief <i> [Deprecated] </i>
403 * @deprecated This method is deprecated as a new method has been introduced.
404 * Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
407 * @return @c true if the input character is a lowercase English alphabet, @n
409 * @param[in] ch The Unicode character
412 static bool IsLower(wchar_t ch);
415 * Checks whether the input character is a lowercase alphabet. @n
416 * Unicode characters other than English alphabets are also supported.
420 * @return @c true if the input character is a lowercase alphabet, @n
422 * @param[in] ch The Unicode character
424 static bool IsLowerCase(wchar_t ch);
428 * Checks whether the input character is an uppercase alphabet.
430 * @brief <i> [Deprecated] </i>
431 * @deprecated This method is deprecated as a new method has been introduced.
432 * Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than English alphabets.
435 * @return @c true if the input character is an uppercase alphabet, @n
437 * @param[in] ch The Unicode character
440 static bool IsUpper(wchar_t ch);
443 * Checks whether the input character is an uppercase alphabet. @n
444 * Unicode characters other than English alphabets are also supported.
448 * @return @c true if the input character is an uppercase alphabet, @n
450 * @param[in] ch The Unicode character
452 static bool IsUpperCase(wchar_t ch);
455 * Returns the value of the input character in the supplied radix. @n
456 * The value of radix must be between RADIX_MIN and RADIX_MAX.
460 * @return A integer value of the input character in the supplied radix
461 * @param[in] ch The character to determine the value
462 * @param[in] radix The radix
464 static int ToDigit(wchar_t ch, int radix);
467 * Returns the value which represents the input digit with specified radix. @n
468 * The value of radix must be between RADIX_MIN and RADIX_MAX.
472 * @return A Unicode character of the input digit with specified @c radix @n
473 * else @c null character (U+0000)
474 * @param[in] digit The digit to determine the value
475 * @param[in] radix The radix
477 static wchar_t ForDigit(int digit, int radix);
480 * Gets the numeric value of the input unicode character. @n
481 * This is used when some numeric values are fractions, negative, or too large for @c int value.
485 * @return A @c double value @n NO_NUMERIC_VALUE for characters without any numeric values in the Unicode %Character.
486 * @param[in] ch A Unicode character
488 static double GetNumericValue(wchar_t ch);
491 * Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
495 * @return @c true if the Unicode character is an assigned character, @n
497 * @param[in] ch A Unicode character
499 static bool IsDefined(wchar_t ch);
502 * Checks whether the input character is a whitespace character. @n
503 * A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
507 * @return @c true if the Unicode character is a whitespace character, @n
509 * @param[in] ch A Unicode character
512 * It is a Unicode Separator character, but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
513 * It is U+0009 HORIZONTAL TABULATION.
514 * It is U+000A LINE FEED.
515 * It is U+000B VERTICAL TABULATION.
516 * It is U+000C FORM FEED.
517 * It is U+000D CARRIAGE RETURN.
518 * It is U+001C FILE SEPARATOR.
519 * It is U+001D GROUP SEPARATOR.
520 * It is U+001E RECORD SEPARATOR.
521 * It is U+001F UNIT SEPARATOR.
524 static bool IsWhitespace(wchar_t ch);
528 * Checks whether the input character is a title character.
532 * @return @c true if the Unicode character is a title character, @n
534 * @param[in] ch A Unicode character
536 static bool IsTitleCase(wchar_t ch);
539 * Returns the title case equivalent for the input character if it exists.
543 * @return A title case character equivalent for the input character @n The character itself is returned if none is defined.
544 * @param[in] ch A Unicode character
546 static wchar_t ToTitleCase(wchar_t ch);
549 * Checks whether the input character is an ISO control code or not.
553 * @return @c true if the Unicode character is an ISO control character, @n
555 * @param[in] ch A Unicode character
557 static bool IsISOControl(wchar_t ch);
561 * A constant holding the largest value of type @c wchar_t, 0xFFFF.
565 static const wchar_t VALUE_MAX = 0x10FFFF;
568 * A constant holding the smallest value of type wchar_t, 0x0000.
572 static const wchar_t VALUE_MIN = 0x0000;
575 * The minimum radix available for conversion to and from strings. @n
576 * Same value as RADIX_BINARY.
580 static const int RADIX_MIN = 2;
583 * The radix for binary number.
587 static const int RADIX_BINARY = 2;
590 * The radix for decimal number.
594 static const int RADIX_DECIMAL = 10;
597 * The radix for octal number.
601 static const int RADIX_OCTAL = 8;
604 * The radix for hexadecimal number.
608 static const int RADIX_HEXADECIMAL = 16;
611 * The maximum radix available for conversion to and from strings. Same value as RADIX_HEXADECIMAL.
615 static const int RADIX_MAX = 36;
618 * Special value that is returned by GetNumericValue(wchar_t ch) when no numeric value is defined for the unicode character.
622 static const double NO_NUMERIC_VALUE = -123456789.0;
628 friend class Integer;
632 friend class _CharacterImpl;
633 class _CharacterImpl * __pCharacterImpl;
639 #endif // _FBASE_CHARACTER_H_