inc/FBaseCharacter.h

   1 //
   2 // Open Service Platform
   3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   4 //
   5 // Licensed under the Apache License, Version 2.0 (the License);
   6 // you may not use this file except in compliance with the License.
   7 // You may obtain a copy of the License at
   8 //
   9 //     http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing, software
  12 // distributed under the License is distributed on an "AS IS" BASIS,
  13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 // See the License for the specific language governing permissions and
  15 // limitations under the License.
  16 //
  17
  18 /**
  19  * @file                FBaseCharacter.h
  20  * @brief               This is the header file for the %Character class.
  21  *
  22  * This header file contains the declarations of the %Character class.
  23  */
  24 #ifndef _FBASE_CHARACTER_H_
  25 #define _FBASE_CHARACTER_H_
  26
  27 #include <FBaseObject.h>
  28 #include <FBaseString.h>
  29
  30
  31 namespace Tizen { namespace Base
  32 {
  33 /**
  34  *      @enum   UnicodeCategory
  35  *
  36  *      Defines the constants used to distinguish the categories of Unicode characters.
  37  *
  38  *      @since 2.0
  39  */
  40 enum UnicodeCategory
  41 {
  42         UNICODE_SURROGATE = 1,          /**<    The surrogate Unicode category          */
  43         UNICODE_MODIFIER,               /**<    The spacing modifier Unicode category           */
  44         UNICODE_ARROW,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  45         UNICODE_SPACE,                  /**<    The space Unicode category                      */
  46         UNICODE_PUNCTUATION,            /**<    The punctuation Unicode category        */
  47         UNICODE_CONTROL,                /**<    The control Unicode category            */
  48         UNICODE_MATH,                   /**<    The math Unicode category                       */
  49         UNICODE_DIGIT,                  /**<    The digit Unicode category                      */
  50         UNICODE_HANGUL,                 /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  51         UNICODE_HANJA,                  /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  52         UNICODE_COMBINING,              /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  53         UNICODE_LANGUAGE,               /**<    @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif*/
  54         UNICODE_UPPERCASE,              /**<    The uppercase Unicode category*/
  55         UNICODE_LOWERCASE,              /**<    The lowercase Unicode category*/
  56         UNICODE_TITLECASE,              /**<    The titlecase Unicode category*/
  57         UNICODE_LETTER,                 /**<    The letter Unicode category*/
  58         UNICODE_MARK,                   /**<    The mark Unicode category*/
  59         UNICODE_CURRENCY,               /**<    The currency Unicode category*/
  60         UNICODE_SEPARATOR,              /**<    The separator Unicode category*/
  61         UNICODE_OTHER                   /**<    The other Unicode category              */
  62 };
  63
  64
  65 /**
  66  *      @class  Character
  67  *      @brief  This class is the wrapper class for the @c wchar_t built-in type.
  68  *
  69  *      @since 2.0
  70  *
  71  *      The %Character class wraps a value of the @c wchar_t type. It also provides
  72  *      several methods for determining a Unicode character's category, and for
  73  *      converting the case of intrinsic characters. The class is useful when
  74  *      passing a Unicode character to a method expecting an instance of Object.
  75  *
  76  * The following example demonstrates how to use the %Character class.
  77  *
  78  *      @code
  79  *
  80  *      #include <FBase.h>
  81  *
  82  *      using namespace Tizen::Base;
  83  *
  84  *      // This method converts the first character of the given @c string to the upper case.
  85  *      void
  86  *      MyClass::CharacterSample(String& str)
  87  *      {
  88  *              wchar_t chr, upper;
  89  *
  90  *              str.GetCharAt(0, chr);
  91  *
  92  *    // Converts to upper case
  93  *              upper = Character::ToUpper(chr);
  94  *
  95  *              str.SetCharAt(upper, 0);
  96  *  }
  97  *      @endcode
  98  */
  99 class _OSP_EXPORT_ Character
 100         : public Object
 101 {
 102 public:
 103         /**
 104          *      Initializes this instance of %Character with the specified parameter.
 105          *
 106          *      @since 2.0
 107          *
 108          *      @param[in]      value   A multi-byte character used to initialize the %Character instance
 109          */
 110         Character(wchar_t value);
 111
 112         /**
 113          *      Copying of objects using this copy constructor is allowed.
 114          *
 115          *      @since 2.0
 116          *
 117          *      @param[in]      value   An instance of %Character
 118          */
 119         Character(const Character& value);
 120
 121         /**
 122          *      This destructor overrides Tizen::Base::Object::~Object().
 123          *
 124          *      @since 2.0
 125          */
 126         virtual ~Character(void);
 127
 128         /**
 129          *      Copying of objects using this copy assignment operator is allowed.
 130          *
 131          *      @since 2.0
 132          *
 133          *      @param[in]      rhs     An instance of %Character
 134          */
 135         Character& operator =(const Character& rhs);
 136
 137         /**
 138          *      Compares (ordinal comparison) the value of the calling instance
 139          *      with the value of the input %Character instance.
 140          *
 141          *      @since 2.0
 142          *
 143          *      @return         A 32-bit @c signed integer value
 144          *  @param[in]  value   The character instance to compare with
 145          *
 146          *      @code
 147          *      @li <  0  if the value of the current instance is less than the value of the input instance
 148          *      @li == 0  if the value of the current instance is equal to the value of the input instance
 149          *      @li >  0  if the value of the current instance is greater than the value of the input instance
 150          *      @endcode
 151          *
 152          */
 153         int CompareTo(const Character& value) const;
 154
 155         /**
 156          *      Checks whether the value of the Object parameter is equal
 157          *      to the value of the calling object.
 158          *
 159          *      @since 2.0
 160          *      @return         @c true if the input Object is equal to the calling %Object, @n
 161          *                              else @c false
 162          *      @param[in]      obj             The object to compare with the calling object
 163          *      @see                    Tizen::Base::Object::Equals()
 164          */
 165         virtual bool Equals(const Object& obj) const;
 166
 167         /**
 168          *      Gets the hash value of the current instance.
 169          *
 170          *      @since 2.0
 171          *
 172          *      @return         The hash value of the current instance
 173          *      @remarks        The two Tizen::Base::Object::Equals() instances must return the same hash value. For better performance,
 174          *                              the used hash function must generate a random distribution for all inputs.
 175          */
 176         virtual int GetHashCode(void) const;
 177
 178         /**
 179          *      Returns the value of the current instance as a
 180          *      @c wchar_t.
 181          *
 182          *      @since 2.0
 183          *
 184          *      @return         The value of this instance as
 185          *                              a @c wchar_t
 186          */
 187         wchar_t ToMchar(void) const;
 188
 189         /**
 190          *      @if OSPDEPREC
 191          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 192          *      The Unicode characters other than English alphabets are not changed.
 193          *
 194          *      @brief  <i> [Deprecated] </i>
 195          *      @deprecated This method is deprecated as a new method has been introduced.
 196          *      Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than English alphabets.
 197          *
 198          *      @since 2.0
 199          *      @endif
 200          */
 201         void ToLower(void);
 202
 203
 204         /**
 205          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 206          *      The Unicode characters other than English alphabets are also supported.
 207          *
 208          *      @since 2.0
 209          */
 210         void ToLowerCase(void);
 211
 212
 213         /**
 214          *      @if OSPDEPREC
 215          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 216          *      The Unicode characters other than English alphabets are not changed.
 217          *
 218          *      @brief  <i> [Deprecated] </i>
 219          *      @deprecated This method is deprecated as a new method has been introduced.
 220          *      Instead of using this method, use the ToUpperCase() method that supports the Unicode characters other than English alphabets.
 221          *
 222          *      @since 2.0
 223          *      @endif
 224          */
 225         void ToUpper(void);
 226
 227
 228         /**
 229          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 230          *      The Unicode characters other than English alphabets are also supported.
 231          *
 232          *      @since 2.0
 233          */
 234         void ToUpperCase(void);
 235
 236
 237         /**
 238          *      Returns a string representing the value of the calling %Character instance.
 239          *
 240          *      @since 2.0
 241          *
 242          *      @return         An instance of String class that
 243          *                              contains a Unicode representation of the calling instance
 244          */
 245         String ToString(void) const;
 246
 247         /**
 248          *      Returns a string representation of the input
 249          *      @c wchar_t value.
 250          *
 251          *      @since 2.0
 252          *
 253          *      @return         An instance of the String class
 254          *                              that contains the Unicode representation of the
 255          *                              input value
 256          *      @param[in]      value   The Unicode character to convert
 257          */
 258         static String ToString(wchar_t value);
 259
 260         /**
 261          *      Categorizes a Unicode character into a group identified by
 262          *      one of the UnicodeCategory values.
 263          *
 264          *      @since 2.0
 265          *
 266          *      @return             A value of type UnicodeCategory that identifies the group that contains the specified @c ch
 267          *      @param[in]      ch      The Unicode character to categorize
 268          *
 269          */
 270         static UnicodeCategory GetUnicodeCategory(wchar_t ch);
 271
 272         /**
 273          *      @if OSPDEPREC
 274          *      Converts the input Unicode character to its equivalent lowercase. @n
 275          *      The Unicode characters other than English alphabets are not changed.
 276          *
 277          *      @brief  <i> [Deprecated] </i>
 278          *      @deprecated This method is deprecated as a new method has been introduced.
 279          *      Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports the Unicode characters other than English alphabets.
 280          *
 281          *      @since 2.0
 282          *      @return         An lowercase equivalent of the input Unicode character
 283          *      @param[in]      ch      The Unicode character to convert
 284          *      @endif
 285          */
 286         static wchar_t ToLower(wchar_t ch);
 287
 288         /**
 289          *      Converts the input Unicode character to its equivalent lowercase. @n
 290          *      The Unicode characters other than English alphabets are also supported.
 291          *
 292          *      @since 2.0
 293          *
 294          *      @return         An lowercase equivalent of the input Unicode character
 295          *      @param[in]      ch      The Unicode character to convert
 296          */
 297         static wchar_t ToLowerCase(wchar_t ch);
 298
 299         /**
 300          *      @if OSPDEPREC
 301          *      Converts the input Unicode character to its equivalent uppercase. @n
 302          *  The Unicode characters other than English alphabets are not changed.
 303          *
 304          *      @brief  <i> [Deprecated] </i>
 305          *      @deprecated This method is deprecated as a new method has been introduced.
 306          *      Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than English alphabets.
 307          *
 308          *      @since 2.0
 309          *      @return         An uppercase equivalent of the input Unicode character
 310          *      @param[in]      ch      The Unicode character to convert
 311          *      @endif
 312          */
 313         static wchar_t ToUpper(wchar_t ch);
 314
 315         /**
 316          *      Converts the input Unicode character to its equivalent uppercase. @n
 317          *      The Unicode characters other than English alphabets are also supported.
 318          *
 319          *      @since 2.0
 320          *
 321          *      @return         An uppercase equivalent of the input Unicode character
 322          *      @param[in]      ch      The Unicode character to convert
 323          */
 324         static wchar_t ToUpperCase(wchar_t ch);
 325
 326         /**
 327          *      Checks whether the input character is an alphanumeric character (letter or digit). @n
 328          *      A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns true for the character
 329          *      @if OSPCOMPAT
 330          *      @brief <i> [Compatibility] </i>
 331          *      @endif
 332          *      @since 2.0
 333          *      @if OSPCOMPAT
 334          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 335          *                      For more information, see @ref CompCharacterIsAlphaNumericPage "here".
 336          *      @endif
 337          *      @return         @c true if the input character is alphanumeric, @n
 338          *                              else @c false
 339          *      @param[in]      ch      The Unicode character
 340          */
 341         static bool IsAlphaNumeric(wchar_t ch);
 342
 343         /**
 344          *      @if OSPCOMPAT
 345          *      @page                   CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
 346          *      @section                CompCharacterIsAlphaNumericPageIssueSection Issues
 347          *                              Implementing this method in OSP compatible applications has the following issues: @n
 348          *
 349          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter and digit category.
 350          *
 351          *      @section                CompCharacterIsAlphaNumericPageSolutionSection Resolutions
 352          *
 353          *      This issue has been resolved in Tizen.
 354          *      @endif
 355          */
 356
 357
 358         /**
 359          *      Checks whether the input character is a digit.
 360          *
 361          *      @since 2.0
 362          *
 363          *      @return         @c true if the input character is a digit, @n
 364          *                              else @c false
 365          *      @param[in]      ch      The Unicode character
 366          */
 367         static bool IsDigit(wchar_t ch);
 368
 369         /**
 370          *      Checks whether the input character is a Unicode letter.
 371          *      @if OSPCOMPAT
 372          *      @brief <i> [Compatibility] </i>
 373          *      @endif
 374          *      @since 2.0
 375          *      @if OSPCOMPAT
 376          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 377          *                      For more information, see @ref CompCharacterIsLetterPage "here".
 378          *      @endif
 379          *      @return         @c true if the input character is an alphabet, @n
 380          *                              else @c false
 381          *      @param[in]      ch      The Unicode character
 382          */
 383         static bool IsLetter(wchar_t ch);
 384
 385         /**
 386          *      @if OSPCOMPAT
 387          *      @page                   CompCharacterIsLetterPage Compatibility for IsLetter()
 388          *      @section                CompCharacterIsLetterPageIssueSection Issues
 389          *                              Implementing this method in OSP compatible applications has the following issues: @n
 390          *
 391          *      -# The method returns true only if the character is alphabet character and cannot checks other Unicode character in Letter category.
 392          *
 393          *      @section                CompCharacterIsLetterPageSolutionSection Resolutions
 394          *
 395          *      This issue has been resolved in Tizen.
 396          *      @endif
 397          */
 398
 399         /**
 400          *      @if OSPDEPREC
 401          *      Checks whether the input character is a lowercase alphabet.
 402          *
 403          *      @brief  <i> [Deprecated] </i>
 404          *      @deprecated This method is deprecated as a new method has been introduced.
 405          *      Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
 406          *      @since 2.0
 407          *
 408          *      @return         @c true if the input character is a lowercase English alphabet, @n
 409          *                              else @c false
 410          *      @param[in]      ch      The Unicode character
 411          *      @endif
 412          */
 413         static bool IsLower(wchar_t ch);
 414
 415         /**
 416          *      Checks whether the input character is a lowercase alphabet. @n
 417          *      Unicode characters other than English alphabets are also supported.
 418          *
 419          *      @since 2.0
 420          *
 421          *      @return         @c true if the input character is a lowercase alphabet, @n
 422          *                              else @c false
 423          *      @param[in]      ch The Unicode character
 424          */
 425         static bool IsLowerCase(wchar_t ch);
 426
 427         /**
 428          *      @if OSPDEPREC
 429          *      Checks whether the input character is an uppercase alphabet.
 430          *
 431          *      @brief  <i> [Deprecated] </i>
 432          *      @deprecated This method is deprecated as a new method has been introduced.
 433          *      Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than English alphabets.
 434          *      @since 2.0
 435          *
 436          *      @return         @c true if the input character is an uppercase alphabet, @n
 437          *                              else @c false
 438          *      @param[in]      ch The Unicode character
 439          *      @endif
 440          */
 441         static bool IsUpper(wchar_t ch);
 442
 443         /**
 444          *      Checks whether the input character is an uppercase alphabet. @n
 445          *      Unicode characters other than English alphabets are also supported.
 446          *
 447          *      @since 2.0
 448          *
 449          *      @return         @c true if the input character is an uppercase alphabet, @n
 450          *                              else @c false
 451          *      @param[in]      ch The Unicode character
 452          */
 453         static bool IsUpperCase(wchar_t ch);
 454
 455         /**
 456          *      Returns the value of the input character in the supplied radix. @n
 457          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 458          *
 459          *      @since 2.0
 460          *
 461          *      @return         A integer value of the input character in the supplied radix
 462          *      @param[in]      ch   The character to determine the value
 463          *      @param[in]      radix   The radix
 464          */
 465         static int ToDigit(wchar_t ch, int radix);
 466
 467         /**
 468          *      Returns the value which represents the input digit with specified radix. @n
 469          * The value of radix must be between RADIX_MIN and RADIX_MAX.
 470          *
 471          *      @since 2.0
 472          *
 473          *      @return         A Unicode character of the input digit with specified @c radix @n
 474          *                              else @c null character (U+0000)
 475          *      @param[in]      digit   The digit to determine the value
 476          *      @param[in]      radix   The radix
 477          */
 478         static wchar_t ForDigit(int digit, int radix);
 479
 480         /**
 481          *      Gets the numeric value of the input unicode character. @n
 482      * This is used when some numeric values are fractions, negative, or too large for @c int value.
 483          *
 484          *      @since 2.0
 485          *
 486          *      @return         A @c double value @n NO_NUMERIC_VALUE for characters without any numeric values in the Unicode %Character.
 487          *      @param[in]      ch      A Unicode character
 488          */
 489         static double GetNumericValue(wchar_t ch);
 490
 491         /**
 492          *      Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
 493          *
 494          *      @since 2.0
 495          *
 496          *      @return         @c true if the Unicode character is an assigned character, @n
 497          *                          else @c false
 498          *      @param[in]      ch      A Unicode character
 499          */
 500         static bool IsDefined(wchar_t ch);
 501
 502         /**
 503          *      Checks whether the input character is a whitespace character. @n
 504          *      A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
 505          *
 506          *      @since 2.0
 507          *
 508          *      @return         @c true if the Unicode character is a whitespace character, @n
 509          *                          else @c false
 510          *      @param[in]      ch      A Unicode character
 511          *
 512          *      @code
 513          *      It is a Unicode Separator character, but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 514          *      It is U+0009 HORIZONTAL TABULATION.
 515          *      It is U+000A LINE FEED.
 516          *      It is U+000B VERTICAL TABULATION.
 517          *      It is U+000C FORM FEED.
 518          *      It is U+000D CARRIAGE RETURN.
 519          *      It is U+001C FILE SEPARATOR.
 520          *      It is U+001D GROUP SEPARATOR.
 521          *      It is U+001E RECORD SEPARATOR.
 522          *      It is U+001F UNIT SEPARATOR.
 523          *      @endcode
 524          */
 525         static bool IsWhitespace(wchar_t ch);
 526
 527
 528         /**
 529          *      Checks whether the input character is a title character.
 530          *
 531          *      @since 2.0
 532          *
 533          *      @return         @c true if the Unicode character is a title character, @n
 534          *                          else @c false
 535          *      @param[in]      ch      A Unicode character
 536          */
 537         static bool IsTitleCase(wchar_t ch);
 538
 539         /**
 540          *      Returns the title case equivalent for the input character if it exists.
 541          *
 542          *      @since 2.0
 543          *
 544          *      @return         A title case character equivalent for the input character @n The character itself is returned if none is defined.
 545          *      @param[in]      ch      A Unicode character
 546          */
 547         static wchar_t ToTitleCase(wchar_t ch);
 548
 549         /**
 550          *      Checks whether the input character is an ISO control code or not.
 551          *
 552          *      @since 2.0
 553          *
 554          *      @return         @c true if the Unicode character is an ISO control character, @n
 555          *                          else @c false
 556          *      @param[in]      ch      A Unicode character
 557          */
 558         static bool IsISOControl(wchar_t ch);
 559
 560
 561         /**
 562          * A constant holding the largest value of type @c wchar_t, 0xFFFF.
 563          *
 564          * @since 2.0
 565          */
 566         static const wchar_t VALUE_MAX = 0x10FFFF;
 567
 568         /**
 569          * A constant holding the smallest value of type wchar_t, 0x0000.
 570          *
 571          * @since 2.0
 572          */
 573         static const wchar_t VALUE_MIN = 0x0000;
 574
 575         /**
 576          * The minimum radix available for conversion to and from strings. @n
 577          * Same value as RADIX_BINARY.
 578          *
 579          * @since 2.0
 580          */
 581         static const int RADIX_MIN = 2;
 582
 583         /**
 584          * The radix for binary number.
 585          *
 586          * @since 2.0
 587          */
 588         static const int RADIX_BINARY = 2;
 589
 590         /**
 591          * The radix for decimal number.
 592          *
 593          * @since 2.0
 594          */
 595         static const int RADIX_DECIMAL = 10;
 596
 597         /**
 598          * The radix for octal number.
 599          *
 600          * @since 2.0
 601          */
 602         static const int RADIX_OCTAL = 8;
 603
 604         /**
 605          * The radix for hexadecimal number.
 606          *
 607          * @since 2.0
 608          */
 609         static const int RADIX_HEXADECIMAL = 16;
 610
 611         /**
 612          * The maximum radix available for conversion to and from strings. Same value as RADIX_HEXADECIMAL.
 613          *
 614          * @since 2.0
 615          */
 616         static const int RADIX_MAX = 36;
 617
 618         /**
 619          * Special value that is returned by GetNumericValue(wchar_t ch) when no numeric value is defined for the unicode character.
 620          *
 621          * @since 2.0
 622          */
 623         static const double NO_NUMERIC_VALUE = -123456789.0;
 624
 625 private:
 626         friend class Int8;
 627         friend class Short;
 628         friend class Long;
 629         friend class Integer;
 630
 631         wchar_t __val;
 632
 633         friend class _CharacterImpl;
 634         class _CharacterImpl * __pCharacterImpl;
 635
 636 }; // Character
 637
 638 }} // Tizen::Base
 639
 640 #endif // _FBASE_CHARACTER_H_