inc/FBaseCharacter.h

   1 //
   2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the License);
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 //
  16
  17 /**
  18  * @file                FBaseCharacter.h
  19  * @brief               This is the header file for the %Character class.
  20  *
  21  * This header file contains the declarations of the %Character class.
  22  */
  23 #ifndef _FBASE_CHARACTER_H_
  24 #define _FBASE_CHARACTER_H_
  25
  26 #include <FBaseObject.h>
  27 #include <FBaseString.h>
  28
  29
  30 namespace Tizen { namespace Base
  31 {
  32 /**
  33  *      @enum   UnicodeCategory
  34  *
  35  *      Defines the constants used to distinguish the categories of Unicode characters.
  36  *
  37  *      @since 2.0
  38  */
  39 enum UnicodeCategory
  40 {
  41         UNICODE_SURROGATE = 1,          /**<    The surrogate Unicode category          */
  42         UNICODE_MODIFIER,               /**<    The spacing modifier Unicode category           */
  43         UNICODE_ARROW,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  44         UNICODE_SPACE,                  /**<    The space Unicode category                      */
  45         UNICODE_PUNCTUATION,            /**<    The punctuation Unicode category        */
  46         UNICODE_CONTROL,                /**<    The control Unicode category            */
  47         UNICODE_MATH,                   /**<    The math Unicode category                       */
  48         UNICODE_DIGIT,                  /**<    The digit Unicode category                      */
  49         UNICODE_HANGUL,                 /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  50         UNICODE_HANJA,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  51         UNICODE_COMBINING,              /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  52         UNICODE_LANGUAGE,               /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  53         UNICODE_UPPERCASE,              /**<    The uppercase Unicode category*/
  54         UNICODE_LOWERCASE,              /**<    The lowercase Unicode category*/
  55         UNICODE_TITLECASE,              /**<    The titlecase Unicode category*/
  56         UNICODE_LETTER,                 /**<    The letter Unicode category*/
  57         UNICODE_MARK,                   /**<    The mark Unicode category*/
  58         UNICODE_CURRENCY,               /**<    The currency Unicode category*/
  59         UNICODE_SEPARATOR,              /**<    The separator Unicode category*/
  60         UNICODE_OTHER                   /**<    The other Unicode category              */
  61 };
  62
  63
  64 /**
  65  *      @class  Character
  66  *      @brief  This class is the wrapper class for the @c wchar_t built-in type.
  67  *
  68  *      @since 2.0
  69  *
  70  *      The %Character class wraps a value of the @c wchar_t type. It also provides
  71  *      several methods for determining a Unicode character's category, and for
  72  *      converting the case of intrinsic characters. The class is useful when
  73  *      passing a Unicode character to a method expecting an instance of Object.
  74  *
  75  * The following example demonstrates how to use the %Character class.
  76  *
  77  *      @code
  78  *
  79  *      #include <FBase.h>
  80  *
  81  *      using namespace Tizen::Base;
  82  *
  83  *      // This method converts the first character of the given @c string to the upper case.
  84  *      void
  85  *      MyClass::CharacterSample(String& str)
  86  *      {
  87  *              wchar_t chr, upper;
  88  *
  89  *              str.GetCharAt(0, chr);
  90  *
  91  *    // Converts to upper case
  92  *              upper = Character::ToUpper(chr);
  93  *
  94  *              str.SetCharAt(upper, 0);
  95  *  }
  96  *      @endcode
  97  */
  98 class _OSP_EXPORT_ Character
  99         : public Object
 100 {
 101 public:
 102         /**
 103          *      Initializes this instance of %Character with the specified parameter.
 104          *
 105          *      @since 2.0
 106          *
 107          *      @param[in]      value   A multi-byte character used to initialize the %Character instance
 108          */
 109         Character(wchar_t value);
 110
 111         /**
 112          *      Copying of objects using this copy constructor is allowed.
 113          *
 114          *      @since 2.0
 115          *
 116          *      @param[in]      value   An instance of %Character
 117          */
 118         Character(const Character& value);
 119
 120         /**
 121          *      This destructor overrides Tizen::Base::Object::~Object().
 122          *
 123          *      @since 2.0
 124          */
 125         virtual ~Character(void);
 126
 127         /**
 128          *      Copying of objects using this copy assignment operator is allowed.
 129          *
 130          *      @since 2.0
 131          *
 132          *      @param[in]      rhs     An instance of %Character
 133          */
 134         Character& operator =(const Character& rhs);
 135
 136         /**
 137          *      Compares (ordinal comparison) the value of the calling instance
 138          *      with the value of the input %Character instance.
 139          *
 140          *      @since 2.0
 141          *
 142          *      @return         A 32-bit @c signed integer value
 143          *  @param[in]  value   The character instance to compare with
 144          *
 145          *      @code
 146          *      @li <  0  if the value of the current instance is less than the value of the input instance
 147          *      @li == 0  if the value of the current instance is equal to the value of the input instance
 148          *      @li >  0  if the value of the current instance is greater than the value of the input instance
 149          *      @endcode
 150          *
 151          */
 152         int CompareTo(const Character& value) const;
 153
 154         /**
 155          *      Checks whether the value of the Object parameter is equal
 156          *      to the value of the calling object.
 157          *
 158          *      @since 2.0
 159          *      @return         @c true if the input Object is equal to the calling %Object, @n
 160          *                              else @c false
 161          *      @param[in]      obj             The object to compare with the calling object
 162          *      @see                    Tizen::Base::Object::Equals()
 163          */
 164         virtual bool Equals(const Object& obj) const;
 165
 166         /**
 167          *      Gets the hash value of the current instance.
 168          *
 169          *      @since 2.0
 170          *
 171          *      @return         The hash value of the current instance
 172          *      @remarks        The two Tizen::Base::Object::Equals() instances must return the same hash value. For better performance,
 173          *                              the used hash function must generate a random distribution for all inputs.
 174          */
 175         virtual int GetHashCode(void) const;
 176
 177         /**
 178          *      Returns the value of the current instance as a
 179          *      @c wchar_t.
 180          *
 181          *      @since 2.0
 182          *
 183          *      @return         The value of this instance as
 184          *                              a @c wchar_t
 185          */
 186         wchar_t ToMchar(void) const;
 187
 188         /**
 189          *      @if OSPDEPREC
 190          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 191          *      The Unicode characters other than English alphabets are not changed.
 192          *
 193          *      @brief  <i> [Deprecated] </i>
 194          *      @deprecated This method is deprecated as a new method has been introduced.
 195          *      Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than English alphabets.
 196          *
 197          *      @since 2.0
 198          *      @endif
 199          */
 200         void ToLower(void);
 201
 202
 203         /**
 204          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 205          *      The Unicode characters other than English alphabets are also supported.
 206          *
 207          *      @since 2.0
 208          */
 209         void ToLowerCase(void);
 210
 211
 212         /**
 213          *      @if OSPDEPREC
 214          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 215          *      The Unicode characters other than English alphabets are not changed.
 216          *
 217          *      @brief  <i> [Deprecated] </i>
 218          *      @deprecated This method is deprecated as a new method has been introduced.
 219          *      Instead of using this method, use the ToUpperCase() method that supports the Unicode characters other than English alphabets.
 220          *
 221          *      @since 2.0
 222          *      @endif
 223          */
 224         void ToUpper(void);
 225
 226
 227         /**
 228          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 229          *      The Unicode characters other than English alphabets are also supported.
 230          *
 231          *      @since 2.0
 232          */
 233         void ToUpperCase(void);
 234
 235
 236         /**
 237          *      Returns a string representing the value of the calling %Character instance.
 238          *
 239          *      @since 2.0
 240          *
 241          *      @return         An instance of String class that
 242          *                              contains a Unicode representation of the calling instance
 243          */
 244         String ToString(void) const;
 245
 246         /**
 247          *      Returns a string representation of the input
 248          *      @c wchar_t value.
 249          *
 250          *      @since 2.0
 251          *
 252          *      @return         An instance of the String class
 253          *                              that contains the Unicode representation of the
 254          *                              input value
 255          *      @param[in]      value   The Unicode character to convert
 256          */
 257         static String ToString(wchar_t value);
 258
 259         /**
 260          *      Categorizes a Unicode character into a group identified by
 261          *      one of the UnicodeCategory values.
 262          *
 263          *      @since 2.0
 264          *
 265          *      @return             A value of type UnicodeCategory that identifies the group that contains the specified @c ch
 266          *      @param[in]      ch      The Unicode character to categorize
 267          *
 268          */
 269         static UnicodeCategory GetUnicodeCategory(wchar_t ch);
 270
 271         /**
 272          *      @if OSPDEPREC
 273          *      Converts the input Unicode character to its equivalent lowercase. @n
 274          *      The Unicode characters other than English alphabets are not changed.
 275          *
 276          *      @brief  <i> [Deprecated] </i>
 277          *      @deprecated This method is deprecated as a new method has been introduced.
 278          *      Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports the Unicode characters other than English alphabets.
 279          *
 280          *      @since 2.0
 281          *      @return         An lowercase equivalent of the input Unicode character
 282          *      @param[in]      ch      The Unicode character to convert
 283          *      @endif
 284          */
 285         static wchar_t ToLower(wchar_t ch);
 286
 287         /**
 288          *      Converts the input Unicode character to its equivalent lowercase. @n
 289          *      The Unicode characters other than English alphabets are also supported.
 290          *
 291          *      @since 2.0
 292          *
 293          *      @return         An lowercase equivalent of the input Unicode character
 294          *      @param[in]      ch      The Unicode character to convert
 295          */
 296         static wchar_t ToLowerCase(wchar_t ch);
 297
 298         /**
 299          *      @if OSPDEPREC
 300          *      Converts the input Unicode character to its equivalent uppercase. @n
 301          *  The Unicode characters other than English alphabets are not changed.
 302          *
 303          *      @brief  <i> [Deprecated] </i>
 304          *      @deprecated This method is deprecated as a new method has been introduced.
 305          *      Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than English alphabets.
 306          *
 307          *      @since 2.0
 308          *      @return         An uppercase equivalent of the input Unicode character
 309          *      @param[in]      ch      The Unicode character to convert
 310          *      @endif
 311          */
 312         static wchar_t ToUpper(wchar_t ch);
 313
 314         /**
 315          *      Converts the input Unicode character to its equivalent uppercase. @n
 316          *      The Unicode characters other than English alphabets are also supported.
 317          *
 318          *      @since 2.0
 319          *
 320          *      @return         An uppercase equivalent of the input Unicode character
 321          *      @param[in]      ch      The Unicode character to convert
 322          */
 323         static wchar_t ToUpperCase(wchar_t ch);
 324
 325         /**
 326          *      Checks whether the input character is an alphanumeric character (letter or digit). @n
 327          *      A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns true for the character
 328          *      @if OSPCOMPAT
 329          *      @brief <i> [Compatibility] </i>
 330          *      @endif
 331          *      @since 2.0
 332          *      @if OSPCOMPAT
 333          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 334          *                      For more information, see @ref CompCharacterIsAlphaNumericPage "here".
 335          *      @endif
 336          *      @return         @c true if the input character is alphanumeric, @n
 337          *                              else @c false
 338          *      @param[in]      ch      The Unicode character
 339          */
 340         static bool IsAlphaNumeric(wchar_t ch);
 341
 342         /**
 343          *      @if OSPCOMPAT
 344          *      @page                   CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
 345          *      @section                CompCharacterIsAlphaNumericPageIssueSection Issues
 346          *                              Implementing this method in OSP compatible applications has the following issues: @n
 347          *
 348          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter and digit category.
 349          *
 350          *      @section                CompCharacterIsAlphaNumericPageSolutionSection Resolutions
 351          *
 352          *      This issue has been resolved in Tizen.
 353          *      @endif
 354          */
 355
 356
 357         /**
 358          *      Checks whether the input character is a digit.
 359          *
 360          *      @since 2.0
 361          *
 362          *      @return         @c true if the input character is a digit, @n
 363          *                              else @c false
 364          *      @param[in]      ch      The Unicode character
 365          */
 366         static bool IsDigit(wchar_t ch);
 367
 368         /**
 369          *      Checks whether the input character is a Unicode letter.
 370          *      @if OSPCOMPAT
 371          *      @brief <i> [Compatibility] </i>
 372          *      @endif
 373          *      @since 2.0
 374          *      @if OSPCOMPAT
 375          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 376          *                      For more information, see @ref CompCharacterIsLetterPage "here".
 377          *      @endif
 378          *      @return         @c true if the input character is an alphabet, @n
 379          *                              else @c false
 380          *      @param[in]      ch      The Unicode character
 381          */
 382         static bool IsLetter(wchar_t ch);
 383
 384         /**
 385          *      @if OSPCOMPAT
 386          *      @page                   CompCharacterIsLetterPage Compatibility for IsLetter()
 387          *      @section                CompCharacterIsLetterPageIssueSection Issues
 388          *                              Implementing this method in OSP compatible applications has the following issues: @n
 389          *
 390          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter category.
 391          *
 392          *      @section                CompCharacterIsLetterPageSolutionSection Resolutions
 393          *
 394          *      This issue has been resolved in Tizen.
 395          *      @endif
 396          */
 397
 398         /**
 399          *      @if OSPDEPREC
 400          *      Checks whether the input character is a lowercase alphabet.
 401          *
 402          *      @brief  <i> [Deprecated] </i>
 403          *      @deprecated This method is deprecated as a new method has been introduced.
 404          *      Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
 405          *      @since 2.0
 406          *
 407          *      @return         @c true if the input character is a lowercase English alphabet, @n
 408          *                              else @c false
 409          *      @param[in]      ch      The Unicode character
 410          *      @endif
 411          */
 412         static bool IsLower(wchar_t ch);
 413
 414         /**
 415          *      Checks whether the input character is a lowercase alphabet. @n
 416          *      Unicode characters other than English alphabets are also supported.
 417          *
 418          *      @since 2.0
 419          *
 420          *      @return         @c true if the input character is a lowercase alphabet, @n
 421          *                              else @c false
 422          *      @param[in]      ch The Unicode character
 423          */
 424         static bool IsLowerCase(wchar_t ch);
 425
 426         /**
 427          *      @if OSPDEPREC
 428          *      Checks whether the input character is an uppercase alphabet.
 429          *
 430          *      @brief  <i> [Deprecated] </i>
 431          *      @deprecated This method is deprecated as a new method has been introduced.
 432          *      Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than English alphabets.
 433          *      @since 2.0
 434          *
 435          *      @return         @c true if the input character is an uppercase alphabet, @n
 436          *                              else @c false
 437          *      @param[in]      ch The Unicode character
 438          *      @endif
 439          */
 440         static bool IsUpper(wchar_t ch);
 441
 442         /**
 443          *      Checks whether the input character is an uppercase alphabet. @n
 444          *      Unicode characters other than English alphabets are also supported.
 445          *
 446          *      @since 2.0
 447          *
 448          *      @return         @c true if the input character is an uppercase alphabet, @n
 449          *                              else @c false
 450          *      @param[in]      ch The Unicode character
 451          */
 452         static bool IsUpperCase(wchar_t ch);
 453
 454         /**
 455          *      Returns the value of the input character in the supplied radix. @n
 456          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 457          *
 458          *      @since 2.0
 459          *
 460          *      @return         A integer value of the input character in the supplied radix
 461          *      @param[in]      ch   The character to determine the value
 462          *      @param[in]      radix   The radix
 463          */
 464         static int ToDigit(wchar_t ch, int radix);
 465
 466         /**
 467          *      Returns the value which represents the input digit with specified radix. @n
 468          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 469          *
 470          *      @since 2.0
 471          *
 472          *      @return         A Unicode character of the input digit with specified @c radix @n
 473          *                              else @c null character (U+0000)
 474          *      @param[in]      digit   The digit to determine the value
 475          *      @param[in]      radix   The radix
 476          */
 477         static wchar_t ForDigit(int digit, int radix);
 478
 479         /**
 480          *      Gets the numeric value of the input unicode character. @n
 481      * This is used when some numeric values are fractions, negative, or too large for @c int value.
 482          *
 483          *      @since 2.0
 484          *
 485          *      @return         A @c double value @n NO_NUMERIC_VALUE for characters without any numeric values in the Unicode %Character.
 486          *      @param[in]      ch      A Unicode character
 487          */
 488         static double GetNumericValue(wchar_t ch);
 489
 490         /**
 491          *      Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
 492          *
 493          *      @since 2.0
 494          *
 495          *      @return         @c true if the Unicode character is an assigned character, @n
 496          *                          else @c false
 497          *      @param[in]      ch      A Unicode character
 498          */
 499         static bool IsDefined(wchar_t ch);
 500
 501         /**
 502          *      Checks whether the input character is a whitespace character. @n
 503          *      A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
 504          *
 505          *      @since 2.0
 506          *
 507          *      @return         @c true if the Unicode character is a whitespace character, @n
 508          *                          else @c false
 509          *      @param[in]      ch      A Unicode character
 510          *
 511          *      @code
 512          *      It is a Unicode Separator character, but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 513          *      It is U+0009 HORIZONTAL TABULATION.
 514          *      It is U+000A LINE FEED.
 515          *      It is U+000B VERTICAL TABULATION.
 516          *      It is U+000C FORM FEED.
 517          *      It is U+000D CARRIAGE RETURN.
 518          *      It is U+001C FILE SEPARATOR.
 519          *      It is U+001D GROUP SEPARATOR.
 520          *      It is U+001E RECORD SEPARATOR.
 521          *      It is U+001F UNIT SEPARATOR.
 522          *      @endcode
 523          */
 524         static bool IsWhitespace(wchar_t ch);
 525
 526
 527         /**
 528          *      Checks whether the input character is a title character.
 529          *
 530          *      @since 2.0
 531          *
 532          *      @return         @c true if the Unicode character is a title character, @n
 533          *                          else @c false
 534          *      @param[in]      ch      A Unicode character
 535          */
 536         static bool IsTitleCase(wchar_t ch);
 537
 538         /**
 539          *      Returns the title case equivalent for the input character if it exists.
 540          *
 541          *      @since 2.0
 542          *
 543          *      @return         A title case character equivalent for the input character @n The character itself is returned if none is defined.
 544          *      @param[in]      ch      A Unicode character
 545          */
 546         static wchar_t ToTitleCase(wchar_t ch);
 547
 548         /**
 549          *      Checks whether the input character is an ISO control code or not.
 550          *
 551          *      @since 2.0
 552          *
 553          *      @return         @c true if the Unicode character is an ISO control character, @n
 554          *                          else @c false
 555          *      @param[in]      ch      A Unicode character
 556          */
 557         static bool IsISOControl(wchar_t ch);
 558
 559
 560         /**
 561          * A constant holding the largest value of type @c wchar_t, 0xFFFF.
 562          *
 563          * @since 2.0
 564          */
 565         static const wchar_t VALUE_MAX = 0x10FFFF;
 566
 567         /**
 568          * A constant holding the smallest value of type wchar_t, 0x0000.
 569          *
 570          * @since 2.0
 571          */
 572         static const wchar_t VALUE_MIN = 0x0000;
 573
 574         /**
 575          * The minimum radix available for conversion to and from strings. @n
 576          * Same value as RADIX_BINARY.
 577          *
 578          * @since 2.0
 579          */
 580         static const int RADIX_MIN = 2;
 581
 582         /**
 583          * The radix for binary number.
 584          *
 585          * @since 2.0
 586          */
 587         static const int RADIX_BINARY = 2;
 588
 589         /**
 590          * The radix for decimal number.
 591          *
 592          * @since 2.0
 593          */
 594         static const int RADIX_DECIMAL = 10;
 595
 596         /**
 597          * The radix for octal number.
 598          *
 599          * @since 2.0
 600          */
 601         static const int RADIX_OCTAL = 8;
 602
 603         /**
 604          * The radix for hexadecimal number.
 605          *
 606          * @since 2.0
 607          */
 608         static const int RADIX_HEXADECIMAL = 16;
 609
 610         /**
 611          * The maximum radix available for conversion to and from strings. Same value as RADIX_HEXADECIMAL.
 612          *
 613          * @since 2.0
 614          */
 615         static const int RADIX_MAX = 36;
 616
 617         /**
 618          * Special value that is returned by GetNumericValue(wchar_t ch) when no numeric value is defined for the unicode character.
 619          *
 620          * @since 2.0
 621          */
 622         static const double NO_NUMERIC_VALUE = -123456789.0;
 623
 624 private:
 625         friend class Int8;
 626         friend class Short;
 627         friend class Long;
 628         friend class Integer;
 629
 630         wchar_t __val;
 631
 632         friend class _CharacterImpl;
 633         class _CharacterImpl * __pCharacterImpl;
 634
 635 }; // Character
 636
 637 }} // Tizen::Base
 638
 639 #endif // _FBASE_CHARACTER_H_