inc/FBaseCharacter.h

   1 //
   2 // Open Service Platform
   3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   4 //
   5 // Licensed under the Apache License, Version 2.0 (the License);
   6 // you may not use this file except in compliance with the License.
   7 // You may obtain a copy of the License at
   8 //
   9 //     http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing, software
  12 // distributed under the License is distributed on an "AS IS" BASIS,
  13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 // See the License for the specific language governing permissions and
  15 // limitations under the License.
  16 //
  17
  18 /**
  19  * @file                FBaseCharacter.h
  20  * @brief               This is the header file for the %Character class.
  21  *
  22  * This header file contains the declarations of the %Character class.
  23  */
  24 #ifndef _FBASE_CHARACTER_H_
  25 #define _FBASE_CHARACTER_H_
  26
  27 #include <FBaseObject.h>
  28 #include <FBaseString.h>
  29
  30
  31 namespace Tizen { namespace Base
  32 {
  33 /**
  34  *      @enum   UnicodeCategory
  35  *
  36  *      Defines the constants used to distinguish the categories of Unicode characters.
  37  *
  38  *      @since 2.0
  39  */
  40 enum UnicodeCategory
  41 {
  42         UNICODE_SURROGATE = 1,          /**<    The surrogate Unicode category          */
  43         UNICODE_MODIFIER,               /**<    The spacing modifier Unicode category           */
  44         UNICODE_ARROW,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  45         UNICODE_SPACE,                  /**<    The space Unicode category                      */
  46         UNICODE_PUNCTUATION,            /**<    The punctuation Unicode category        */
  47         UNICODE_CONTROL,                /**<    The control Unicode category            */
  48         UNICODE_MATH,                   /**<    The math Unicode category                       */
  49         UNICODE_DIGIT,                  /**<    The digit Unicode category                      */
  50         UNICODE_HANGUL,                 /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  51         UNICODE_HANJA,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  52         UNICODE_COMBINING,              /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  53         UNICODE_LANGUAGE,               /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  54         UNICODE_UPPERCASE,              /**<    The uppercase Unicode category*/
  55         UNICODE_LOWERCASE,              /**<    The lowercase Unicode category*/
  56         UNICODE_TITLECASE,              /**<    The titlecase Unicode category*/
  57         UNICODE_LETTER,                 /**<    The letter Unicode category*/
  58         UNICODE_MARK,                   /**<    The mark Unicode category*/
  59         UNICODE_CURRENCY,               /**<    The currency Unicode category*/
  60         UNICODE_SEPARATOR,              /**<    The separator Unicode category*/
  61         UNICODE_OTHER                   /**<    The other Unicode category              */
  62 };
  63
  64
  65 /**
  66  *      @class  Character
  67  *      @brief  This class is the wrapper class for the @c wchar_t built-in type.
  68  *
  69  *      @since 2.0
  70  *
  71  *      The %Character class wraps a value of the @c wchar_t type. It also provides
  72  *      several methods for determining a Unicode character's category, and for
  73  *      converting the case of intrinsic characters. The class is useful when
  74  *      passing a Unicode character to a method expecting an instance of Object.
  75  *
  76  * The following example demonstrates how to use the %Character class.
  77  *
  78  *      @code
  79  *
  80  *      #include <FBase.h>
  81  *
  82  *      using namespace Tizen::Base;
  83  *
  84  *      // This method converts the first character of the given @c string to the upper case.
  85  *      void
  86  *      MyClass::CharacterSample(String& str)
  87  *      {
  88  *              wchar_t chr, upper;
  89  *
  90  *              str.GetCharAt(0, chr);
  91  *
  92  *    // Converts to upper case
  93  *              upper = Character::ToUpper(chr);
  94  *
  95  *              str.SetCharAt(upper, 0);
  96  *  }
  97  *      @endcode
  98  */
  99 class _OSP_EXPORT_ Character
 100         : public Object
 101 {
 102 public:
 103         /**
 104          *      Initializes this instance of %Character with the specified parameter.
 105          *
 106          *      @since 2.0
 107          *
 108          *      @param[in]      value   A multi-byte character used to initialize the %Character instance
 109          */
 110         Character(wchar_t value);
 111
 112         /**
 113          *      Copying of objects using this copy constructor is allowed.
 114          *
 115          *      @since 2.0
 116          *
 117          *      @param[in]      value   An instance of %Character
 118          */
 119         Character(const Character& value);
 120
 121         /**
 122          *      This destructor overrides Tizen::Base::Object::~Object().
 123          *
 124          *      @since 2.0
 125          */
 126         virtual ~Character(void);
 127
 128         /**
 129          *      Copying of objects using this copy assignment operator is allowed.
 130          *
 131          *      @since 2.0
 132          *
 133          *      @param[in]      rhs     An instance of %Character
 134          */
 135         Character& operator =(const Character& rhs);
 136
 137         /**
 138          *      Compares (ordinal comparison) the value of the calling instance
 139          *      with the value of the input %Character instance.
 140          *
 141          *      @since 2.0
 142          *
 143          *      @return         A 32-bit @c signed integer value
 144          *  @param[in]  value   The character instance to compare with
 145          *
 146          *      @code
 147          *      @li <  0  if the value of the current instance is less than the value of the input instance
 148          *      @li == 0  if the value of the current instance is equal to the value of the input instance
 149          *      @li >  0  if the value of the current instance is greater than the value of the input instance
 150          *      @endcode
 151          *
 152          */
 153         int CompareTo(const Character& value) const;
 154
 155         /**
 156          *      Checks whether the value of the Object parameter is equal
 157          *      to the value of the calling object.
 158          *
 159          *      @since 2.0
 160          *      @return         @c true if the input Object is equal to the calling %Object, @n
 161          *                              else @c false
 162          *      @param[in]      obj             The object to compare with the calling object
 163          *      @see                    Tizen::Base::Object::Equals()
 164          */
 165         virtual bool Equals(const Object& obj) const;
 166
 167         /**
 168          *      Gets the hash value of the current instance.
 169          *
 170          *      @since 2.0
 171          *
 172          *      @return         The hash value of the current instance
 173          *      @remarks        The two Tizen::Base::Object::Equals() instances must return the same hash value. For better performance,
 174          *                              the used hash function must generate a random distribution for all inputs.
 175          */
 176         virtual int GetHashCode(void) const;
 177
 178         /**
 179          *      Returns the value of the current instance as a
 180          *      @c wchar_t.
 181          *
 182          *      @since 2.0
 183          *
 184          *      @return         The value of this instance as
 185          *                              a @c wchar_t
 186          */
 187         wchar_t ToMchar(void) const;
 188
 189         /**
 190          *      @if OSPDEPREC
 191          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 192          *      The Unicode characters other than English alphabets are not changed.
 193          *
 194          *      @brief  <i> [Deprecated] </i>
 195          *      @deprecated This method is deprecated as a new method has been introduced.
 196          *      Instead of using this method, use ToLowerCase() method that supports Unicode characters other than English alphabets.
 197          *
 198          *      @since 2.0
 199          *      @endif
 200          */
 201         void ToLower(void);
 202
 203
 204         /**
 205          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 206          *      The Unicode characters other than English alphabets are also supported.
 207          *
 208          *      @since 2.0
 209          */
 210         void ToLowerCase(void);
 211
 212
 213         /**
 214          *      @if OSPDEPREC
 215          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 216          *      The Unicode characters other than English alphabets are not changed.
 217          *
 218          *      @brief  <i> [Deprecated] </i>
 219          *      @deprecated This method is deprecated as a new method has been introduced.
 220          *      Instead of using this method, use ToUpperCase() method that supports the Unicode characters other than English alphabets.
 221          *
 222          *      @since 2.0
 223          *      @endif
 224          */
 225         void ToUpper(void);
 226
 227
 228         /**
 229          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 230          *      The Unicode characters other than English alphabets are also supported.
 231          *
 232          *      @since 2.0
 233          */
 234         void ToUpperCase(void);
 235
 236
 237         /**
 238          *      Returns a string representing the value of the calling %Character instance.
 239          *
 240          *      @since 2.0
 241          *
 242          *      @return         An instance of String class that
 243          *                              contains a Unicode representation of the calling instance
 244          */
 245         String ToString(void) const;
 246
 247         /**
 248          *      Returns a string representation of the input
 249          *      @c wchar_t value.
 250          *
 251          *      @since 2.0
 252          *
 253          *      @return         An instance of the String class
 254          *                              that contains the Unicode representation of the
 255          *                              input value
 256          *      @param[in]      value   The Unicode character to convert
 257          */
 258         static String ToString(wchar_t value);
 259
 260         /**
 261          *      Categorizes a Unicode character into a group identified by
 262          *      one of the UnicodeCategory values.
 263          *
 264          *      @since 2.0
 265          *
 266          *      @return             A value of type UnicodeCategory that identifies the group that contains the specified @c ch
 267          *      @param[in]      ch      The Unicode character to categorize
 268          *
 269          */
 270         static UnicodeCategory GetUnicodeCategory(wchar_t ch);
 271
 272         /**
 273          *      @if OSPDEPREC
 274          *      Converts the input Unicode character to its equivalent lowercase. @n
 275          *      The Unicode characters other than English alphabets are not changed.
 276          *
 277          *      @brief  <i> [Deprecated] </i>
 278          *      @deprecated This method is deprecated as a new method has been introduced.
 279          *      Instead of using this method, use ToLowerCase(wchar_t ch) method that supports the Unicode characters other than English alphabets.
 280          *
 281          *      @since 2.0
 282          *      @return         An lowercase equivalent of the input Unicode character
 283          *      @param[in]      ch      The Unicode character to convert
 284          *      @endif
 285          */
 286         static wchar_t ToLower(wchar_t ch);
 287
 288         /**
 289          *      Converts the input Unicode character to its equivalent lowercase. @n
 290          *      The Unicode characters other than English alphabets are also supported.
 291          *
 292          *      @since 2.0
 293          *
 294          *      @return         An lowercase equivalent of the input Unicode character
 295          *      @param[in]      ch      The Unicode character to convert
 296          */
 297         static wchar_t ToLowerCase(wchar_t ch);
 298
 299         /**
 300          *      @if OSPDEPREC
 301          *      Converts the input Unicode character to its equivalent uppercase. @n
 302          *  The Unicode characters other than English alphabets are not changed.
 303          *
 304          *      @brief  <i> [Deprecated] </i>
 305          *      @deprecated This method is deprecated as a new method has been introduced.
 306          *      Instead of using this method, use ToUpperCase(wchar_t ch) method that supports Unicode characters other than English alphabets.
 307          *
 308          *      @since 2.0
 309          *      @return         An uppercase equivalent of the input Unicode character
 310          *      @param[in]      ch      The Unicode character to convert
 311          *      @endif
 312          */
 313         static wchar_t ToUpper(wchar_t ch);
 314
 315         /**
 316          *      Converts the input Unicode character to its equivalent uppercase. @n
 317          *      The Unicode characters other than English alphabets are also supported.
 318          *
 319          *      @since 2.0
 320          *
 321          *      @return         An uppercase equivalent of the input Unicode character
 322          *      @param[in]      ch      The Unicode character to convert
 323          */
 324         static wchar_t ToUpperCase(wchar_t ch);
 325
 326         /**
 327          *      Checks whether the input character is an alphanumeric character (letter or digit). @n
 328          *      A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns true for the character
 329          *      @if OSPCOMPAT
 330          *      @brief <i> [Compatibility] </i>
 331          *      @endif
 332          *      @since 2.0
 333          *      @if OSPCOMPAT
 334          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 335          *                      For more information, see @ref CompCharacterIsAlphaNumericPage "here".
 336          *      @endif
 337          *      @return         @c true if the input character is alphanumeric, @n
 338          *                              else @c false
 339          *      @param[in]      ch      The Unicode character
 340          */
 341         static bool IsAlphaNumeric(wchar_t ch);
 342
 343         /**
 344          *      @if OSPCOMPAT
 345          *      @page                   CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
 346          *      @section                CompCharacterIsAlphaNumericPageIssueSection Issues
 347          *                              Implementing this method in OSP compatible applications has the following issues: @n
 348          *
 349          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter and digit category.
 350          *
 351          *      @section                CompCharacterIsAlphaNumericPageSolutionSection Resolutions
 352          *
 353          *      This issue has been resolved in Tizen. @n
 354          *      @endif
 355          */
 356
 357
 358         /**
 359          *      Checks whether the input character is a digit.
 360          *
 361          *      @since 2.0
 362          *
 363          *      @return         @c true if the input character is a digit, @n
 364          *                              else @c false
 365          *      @param[in]      ch      The Unicode character
 366          */
 367         static bool IsDigit(wchar_t ch);
 368
 369         /**
 370          *      Checks whether the input character is a Unicode letter.
 371          *      @if OSPCOMPAT
 372          *      @brief <i> [Compatibility] </i>
 373          *      @endif
 374          *      @since 2.0
 375          *      @if OSPCOMPAT
 376          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 377          *                      For more information, see @ref CompCharacterIsLetterPage "here".
 378          *      @endif
 379          *      @return         @c true if the input character is an alphabet, @n
 380          *                              else @c false
 381          *      @param[in]      ch      The Unicode character
 382          */
 383         static bool IsLetter(wchar_t ch);
 384
 385         /**
 386          *      @if OSPCOMPAT
 387          *      @page                   CompCharacterIsLetterPage Compatibility for IsLetter()
 388          *      @section                CompCharacterIsLetterPageIssueSection Issues
 389          *                              Implementing this method in OSP compatible applications has the following issues: @n
 390          *
 391          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter category.
 392          *
 393          *      @section                CompCharacterIsLetterPageSolutionSection Resolutions
 394          *
 395          *      This issue has been resolved in Tizen. @n
 396          *      @endif
 397          */
 398
 399         /**
 400          *      @if OSPDEPREC
 401          *      Checks whether the input character is an lowercase alphabet.
 402          *
 403          *      @brief  <i> [Deprecated] </i>
 404          *      @deprecated This method is deprecated as a new method has been introduced.
 405          *      Instead of using this method, use IsLowerCase(wchar_t ch) method that supports Unicode characters.
 406          *      @since 2.0
 407          *
 408          *      @return         @c true if the input character is an lowercase English alphabet, @n
 409          *                              else @c false
 410          *      @param[in]      ch      The Unicode character
 411          *      @endif
 412          */
 413         static bool IsLower(wchar_t ch);
 414
 415         /**
 416          *      Checks whether the input character is an lowercase alphabet. @n
 417          *      Unicode characters other than English alphabets are also supported.
 418          *
 419          *      @since 2.0
 420          *
 421          *      @return         @c true if the input character is an lowercase alphabet, @n
 422          *                              else @c false
 423          *      @param[in]      ch The Unicode character
 424          */
 425         static bool IsLowerCase(wchar_t ch);
 426
 427         /**
 428          *      @if OSPDEPREC
 429          *      Checks whether the input character is an uppercase alphabet.
 430          *
 431          *      @brief  <i> [Deprecated] </i>
 432          *      @deprecated This method is deprecated as a new method has been introduced.
 433          *      Instead of using this method, use IsUpperCase(wchar_t ch) method that also supports Unicode characters other than English alphabets.
 434          *      @since 2.0
 435          *
 436          *      @return         @c true if the input character is an uppercase alphabet, @n
 437          *                              else @c false
 438          *      @param[in]      ch The Unicode character
 439          *      @endif
 440          */
 441         static bool IsUpper(wchar_t ch);
 442
 443         /**
 444          *      Checks whether the input character is an uppercase alphabet. @n
 445          *      Unicode characters other than English alphabets are also supported.
 446          *
 447          *      @since 2.0
 448          *
 449          *      @return         @c true if the input character is an uppercase alphabet, @n
 450          *                              else @c false
 451          *      @param[in]      ch The Unicode character
 452          */
 453         static bool IsUpperCase(wchar_t ch);
 454
 455         /**
 456          *      Returns the value of the input character in the supplied radix. @n
 457          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 458          *
 459          *      @since 2.0
 460          *
 461          *      @return         A integer value of the input character in the supplied radix
 462          *      @param[in]      ch   The character to determine the value
 463          *      @param[in]      radix   The radix
 464          */
 465         static int ToDigit(wchar_t ch, int radix);
 466
 467         /**
 468          *      Returns the value which represents the input digit with specified radix. @n
 469          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 470          *
 471          *      @since 2.0
 472          *
 473          *      @return         A Unicode character of the input digit with specified radix; null character (U+0000) otherwise
 474          *      @param[in]      digit   The digit to determine the value
 475          *      @param[in]      radix   The radix
 476          */
 477         static wchar_t ForDigit(int digit, int radix);
 478
 479         /**
 480          *      Gets the numeric value of the input unicode character. @n
 481      * This is used when some numeric values are fractions, negative, or too large for @c int value.
 482          *
 483          *      @since 2.0
 484          *
 485          *      @return         A @c double value @n NO_NUMERIC_VALUE for characters without any numeric values in the Unicode %Character.
 486          *      @param[in]      ch      A Unicode character
 487          */
 488         static double GetNumericValue(wchar_t ch);
 489
 490         /**
 491          *      Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
 492          *
 493          *      @since 2.0
 494          *
 495          *      @return         @c true if the Unicode character is an assigned character, @n
 496          *                          else @c false
 497          *      @param[in]      ch      A Unicode character
 498          */
 499         static bool IsDefined(wchar_t ch);
 500
 501         /**
 502          *      Checks whether the input character is a whitespace character. @n
 503          *      A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
 504          *
 505          *      @since 2.0
 506          *
 507          *      @return         @c true if the Unicode character is a whitespace character, @n
 508          *                          else @c false
 509          *      @param[in]      ch      A Unicode character
 510          *
 511          *      @code
 512          *      It is a Unicode Separator character, but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 513          *      It is U+0009 HORIZONTAL TABULATION.
 514          *      It is U+000A LINE FEED.
 515          *      It is U+000B VERTICAL TABULATION.
 516          *      It is U+000C FORM FEED.
 517          *      It is U+000D CARRIAGE RETURN.
 518          *      It is U+001C FILE SEPARATOR.
 519          *      It is U+001D GROUP SEPARATOR.
 520          *      It is U+001E RECORD SEPARATOR.
 521          *      It is U+001F UNIT SEPARATOR.
 522          *      @endcode
 523          */
 524         static bool IsWhitespace(wchar_t ch);
 525
 526
 527         /**
 528          *      Checks whether the input character is a title character.
 529          *
 530          *      @since 2.0
 531          *
 532          *      @return         @c true if the Unicode character is a title character, @n
 533          *                          else @c false
 534          *      @param[in]      ch      A Unicode character
 535          */
 536         static bool IsTitleCase(wchar_t ch);
 537
 538         /**
 539          *      Returns the title case equivalent for the input character if it exists.
 540          *
 541          *      @since 2.0
 542          *
 543          *      @return         A title case character equivalent for the input character. The character itself is returned if none is defined
 544          *      @param[in]      ch      A Unicode character
 545          */
 546         static wchar_t ToTitleCase(wchar_t ch);
 547
 548         /**
 549          *      Checks whether the input character is an ISO control code or not.
 550          *
 551          *      @since 2.0
 552          *
 553          *      @return         @c true if the Unicode character is an ISO control character, @n
 554          *                          else @c false
 555          *      @param[in]      ch      A Unicode character
 556          */
 557         static bool IsISOControl(wchar_t ch);
 558
 559
 560         /**
 561          * A constant holding the largest value of type @c wchar_t, 0xFFFF.
 562          *
 563          * @since 2.0
 564          */
 565         static const wchar_t VALUE_MAX = 0x10FFFF;
 566
 567         /**
 568          * A constant holding the smallest value of type wchar_t, 0x0000.
 569          *
 570          * @since 2.0
 571          */
 572         static const wchar_t VALUE_MIN = 0x0000;
 573
 574         /**
 575          * The minimum radix available for conversion to and from strings.@n
 576          * As value as RADIX_BINARY.
 577          *
 578          * @since 2.0
 579          */
 580         static const int RADIX_MIN = 2;
 581
 582         /**
 583          * The radix for binary number.
 584          *
 585          * @since 2.0
 586          */
 587         static const int RADIX_BINARY = 2;
 588
 589         /**
 590          * The radix for decimal number.
 591          *
 592          * @since 2.0
 593          */
 594         static const int RADIX_DECIMAL = 10;
 595
 596         /**
 597          * The radix for octal number.
 598          *
 599          * @since 2.0
 600          */
 601         static const int RADIX_OCTAL = 8;
 602
 603         /**
 604          * The radix for hexadecimal number.
 605          *
 606          * @since 2.0
 607          */
 608         static const int RADIX_HEXADECIMAL = 16;
 609
 610         /**
 611          * The maximum radix available for conversion to and from strings. Same value as RADIX_HEXADECIMAL.
 612          *
 613          * @since 2.0
 614          */
 615         static const int RADIX_MAX = 36;
 616
 617         /**
 618          * Special value that is returned by GetNumericValue(wchar_t ch) when no numeric value is defined for the unicode character.
 619          *
 620          * @since 2.0
 621          */
 622         static const double NO_NUMERIC_VALUE = -123456789;
 623
 624 private:
 625         friend class Int8;
 626         friend class Short;
 627         friend class Long;
 628         friend class Integer;
 629
 630         wchar_t __val;
 631
 632         friend class _CharacterImpl;
 633         class _CharacterImpl * __pCharacterImpl;
 634
 635 }; // Character
 636
 637 }} // Tizen::Base
 638
 639 #endif // _FBASE_CHARACTER_H_