inc/FBaseCharacter.h

   1 //
   2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the License);
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 //
  16
  17 /**
  18  * @file                FBaseCharacter.h
  19  * @brief               This is the header file for the %Character class.
  20  *
  21  * This header file contains the declarations of the %Character class.
  22  */
  23 #ifndef _FBASE_CHARACTER_H_
  24 #define _FBASE_CHARACTER_H_
  25
  26 #include <FBaseObject.h>
  27 #include <FBaseString.h>
  28
  29
  30 namespace Tizen { namespace Base
  31 {
  32 /**
  33  *      @enum   UnicodeCategory
  34  *
  35  *      Defines the constants used to distinguish the categories of the Unicode characters.
  36  *
  37  *      @since 2.0
  38  */
  39 enum UnicodeCategory
  40 {
  41         UNICODE_SURROGATE = 1,          /**< The surrogate Unicode category */
  42         UNICODE_MODIFIER,               /**< The spacing modifier Unicode category */
  43         UNICODE_ARROW,                  /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
  44         UNICODE_SPACE,                  /**< The space Unicode category */
  45         UNICODE_PUNCTUATION,            /**< The punctuation Unicode category */
  46         UNICODE_CONTROL,                /**< The control Unicode category */
  47         UNICODE_MATH,                   /**< The math Unicode category */
  48         UNICODE_DIGIT,                  /**< The digit Unicode category */
  49         UNICODE_HANGUL,                 /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
  50         UNICODE_HANJA,                  /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
  51         UNICODE_COMBINING,              /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
  52         UNICODE_LANGUAGE,               /**< @if OSPDEPREC @deprecated This enum value is deprecated because this category is not valid anymore. @endif */
  53         UNICODE_UPPERCASE,              /**< The uppercase Unicode category */
  54         UNICODE_LOWERCASE,              /**< The lowercase Unicode category */
  55         UNICODE_TITLECASE,              /**< The titlecase Unicode category */
  56         UNICODE_LETTER,                 /**< The letter Unicode category */
  57         UNICODE_MARK,                   /**< The mark Unicode category */
  58         UNICODE_CURRENCY,               /**< The currency Unicode category */
  59         UNICODE_SEPARATOR,              /**< The separator Unicode category */
  60         UNICODE_OTHER                   /**< The other Unicode category */
  61 };
  62
  63
  64 /**
  65  *      @class  Character
  66  *      @brief  This class is the wrapper class for the @c wchar_t built-in type.
  67  *
  68  *      @since 2.0
  69  *
  70  *      The %Character class wraps the value of the @c wchar_t type. It also provides
  71  *      several methods for determining a Unicode character's category, and for
  72  *      converting the case of the intrinsic characters. This class is useful when
  73  *      passing a Unicode character to a method expecting an instance of Object.
  74  *
  75  * The following example demonstrates how to use the %Character class.
  76  *
  77  *      @code
  78  *
  79  *      #include <FBase.h>
  80  *
  81  *      using namespace Tizen::Base;
  82  *
  83  *      // This method converts the first character of the given string to the upper case.
  84  *      void
  85  *      MyClass::CharacterSample(String& str)
  86  *      {
  87  *              wchar_t chr, upper;
  88  *
  89  *              str.GetCharAt(0, chr);
  90  *
  91  *    // Converts to upper case
  92  *              upper = Character::ToUpper(chr);
  93  *
  94  *              str.SetCharAt(upper, 0);
  95  *  }
  96  *      @endcode
  97  */
  98 class _OSP_EXPORT_ Character
  99         : public Object
 100 {
 101 public:
 102         /**
 103          *      Initializes this instance of %Character with the specified parameter.
 104          *
 105          *      @since 2.0
 106          *
 107          *      @param[in]      value   The multi-byte character used to initialize the %Character instance
 108          */
 109         Character(wchar_t value);
 110
 111         /**
 112          *      Copying of objects using this copy constructor is allowed.
 113          *
 114          *      @since 2.0
 115          *
 116          *      @param[in]      value   An instance of %Character to copy
 117          */
 118         Character(const Character& value);
 119
 120         /**
 121          *      This destructor overrides Tizen::Base::Object::~Object().
 122          *
 123          *      @since 2.0
 124          */
 125         virtual ~Character(void);
 126
 127         /**
 128          *      Copying of objects using this copy assignment operator is allowed.
 129          *
 130          *      @since 2.0
 131          *
 132          *      @param[in]      rhs     An instance of %Character to copy
 133          */
 134         Character& operator =(const Character& rhs);
 135
 136         /**
 137          *      Compares (ordinal comparison) the value of the calling instance
 138          *      with the value of the input %Character instance.
 139          *
 140          *      @since 2.0
 141          *
 142          *      @return         The 32-bit @c signed integer value
 143          *  @param[in]  value   The %Character instance to compare with
 144          *
 145          *      @code
 146          *      @li <  0  if the value of the current instance is less than the value of the input instance
 147          *      @li == 0  if the value of the current instance is equal to the value of the input instance
 148          *      @li >  0  if the value of the current instance is greater than the value of the input instance
 149          *      @endcode
 150          *
 151          */
 152         int CompareTo(const Character& value) const;
 153
 154         /**
 155          *      Checks whether the value of the input Object is equal
 156          *      to the value of the calling %Object.
 157          *
 158          *      @since 2.0
 159          *      @return         @c true if the input Object is equal to the calling %Object, @n
 160          *                              else @c false
 161          *      @param[in]      obj             The object to compare with the calling object
 162          *      @see            Tizen::Base::Object::Equals()
 163          */
 164         virtual bool Equals(const Object& obj) const;
 165
 166         /**
 167          *      Gets the hash value of the current instance.
 168          *
 169          *      @since 2.0
 170          *
 171          *      @return         The hash value of the current instance
 172          *      @remarks        The two Tizen::Base::Object::Equals() instances must return the same hash value. @n
 173          *                              For better performance,the used hash function must generate a random distribution for all the inputs.
 174          */
 175         virtual int GetHashCode(void) const;
 176
 177         /**
 178          *      Returns the value of the current instance as a @c wchar_t value.
 179          *
 180          *      @since 2.0
 181          *
 182          *      @return         The value of this instance as a @c wchar_t value
 183          */
 184         wchar_t ToMchar(void) const;
 185
 186         /**
 187          *      @if OSPDEPREC
 188          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 189          *      Unicode characters other than the English alphabets are not changed.
 190          *
 191          *      @brief          <i> [Deprecated] </i>
 192          *      @deprecated This method is deprecated as a new method has been introduced.
 193          *                              Instead of using this method, use the ToLowerCase() method that supports Unicode characters other than the English alphabets.
 194          *
 195          *      @since 2.0
 196          *      @endif
 197          */
 198         void ToLower(void);
 199
 200
 201         /**
 202          *      Converts the Unicode characters of the calling object to its equivalent lowercase. @n
 203          *      Unicode characters other than the English alphabets are also supported.
 204          *
 205          *      @since 2.0
 206          */
 207         void ToLowerCase(void);
 208
 209
 210         /**
 211          *      @if OSPDEPREC
 212          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 213          *      Unicode characters other than the English alphabets are not changed.
 214          *
 215          *      @brief          <i> [Deprecated] </i>
 216          *      @deprecated This method is deprecated as a new method has been introduced.
 217          *                              Instead of using this method, use the ToUpperCase() method that supports Unicode characters other than the English alphabets.
 218          *
 219          *      @since 2.0
 220          *      @endif
 221          */
 222         void ToUpper(void);
 223
 224
 225         /**
 226          *      Converts the Unicode characters of the current object to its equivalent uppercase. @n
 227          *      Unicode characters other than English alphabets are also supported.
 228          *
 229          *      @since 2.0
 230          */
 231         void ToUpperCase(void);
 232
 233
 234         /**
 235          *      Returns a string that represents the value of the calling %Character instance.
 236          *
 237          *      @since 2.0
 238          *
 239          *      @return         An instance of String that
 240          *                              contains a Unicode representation of the calling instance
 241          */
 242         String ToString(void) const;
 243
 244         /**
 245          *      Returns a string representation of the input
 246          *      @c wchar_t value.
 247          *
 248          *      @since 2.0
 249          *
 250          *      @return         An instance of String
 251          *                              that contains the Unicode representation of the
 252          *                              input value
 253          *      @param[in]      value   The Unicode character to convert
 254          */
 255         static String ToString(wchar_t value);
 256
 257         /**
 258          *      Categorizes a Unicode character into a group that is identified by
 259          *      one of the UnicodeCategory values.
 260          *
 261          *      @since 2.0
 262          *
 263          *      @return             The value of type UnicodeCategory that identifies the group that contains the specified @c ch
 264          *      @param[in]      ch      The Unicode character to categorize
 265          *
 266          */
 267         static UnicodeCategory GetUnicodeCategory(wchar_t ch);
 268
 269         /**
 270          *      @if OSPDEPREC
 271          *      Converts the input Unicode character to its equivalent lowercase. @n
 272          *      Unicode characters other than the English alphabets are not changed.
 273          *
 274          *      @brief          <i> [Deprecated] </i>
 275          *      @deprecated This method is deprecated as a new method has been introduced.
 276          *                              Instead of using this method, use the ToLowerCase(wchar_t ch) method that supports Unicode characters other than the English alphabets.
 277          *
 278          *      @since 2.0
 279          *      @return         The lowercase equivalent of the input Unicode character
 280          *      @param[in]      ch      The Unicode character to convert
 281          *      @endif
 282          */
 283         static wchar_t ToLower(wchar_t ch);
 284
 285         /**
 286          *      Converts the input Unicode character to its equivalent lowercase. @n
 287          *      Unicode characters other than the English alphabets are also supported.
 288          *
 289          *      @since 2.0
 290          *
 291          *      @return         The lowercase equivalent of the input Unicode character
 292          *      @param[in]      ch      The Unicode character to convert
 293          */
 294         static wchar_t ToLowerCase(wchar_t ch);
 295
 296         /**
 297          *      @if OSPDEPREC
 298          *      Converts the input Unicode character to its equivalent uppercase. @n
 299          *  Unicode characters other than the English alphabets are not changed.
 300          *
 301          *      @brief          <i> [Deprecated] </i>
 302          *      @deprecated This method is deprecated as a new method has been introduced.
 303          *                              Instead of using this method, use the ToUpperCase(wchar_t ch) method that supports Unicode characters other than the English alphabets.
 304          *
 305          *      @since 2.0
 306          *      @return         The uppercase equivalent of the input Unicode character
 307          *      @param[in]      ch      The Unicode character to convert
 308          *      @endif
 309          */
 310         static wchar_t ToUpper(wchar_t ch);
 311
 312         /**
 313          *      Converts the input Unicode character to its equivalent uppercase. @n
 314          *      Unicode characters other than the English alphabets are also supported.
 315          *
 316          *      @since 2.0
 317          *
 318          *      @return         The uppercase equivalent of the input Unicode character
 319          *      @param[in]      ch      The Unicode character to convert
 320          */
 321         static wchar_t ToUpperCase(wchar_t ch);
 322
 323         /**
 324          *      Checks whether the input character is an alphanumeric character (letter or digit). @n
 325          *      A character is considered to be an alphanumeric character if either Character::isLetter(wchar_t ch) or Character::isDigit(wchar_t ch) returns @c true for the character
 326          *      @if OSPCOMPAT
 327          *      @brief <i> [Compatibility] </i>
 328          *      @endif
 329          *      @since 2.0
 330          *      @if OSPCOMPAT
 331          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 332          *                                      For more information, see @ref CompCharacterIsAlphaNumericPage "here".
 333          *      @endif
 334          *      @return         @c true if the input character is alphanumeric, @n
 335          *                              else @c false
 336          *      @param[in]      ch      The Unicode character
 337          */
 338         static bool IsAlphaNumeric(wchar_t ch);
 339
 340         /**
 341          *      @if OSPCOMPAT
 342          *      @page                   CompCharacterIsAlphaNumericPage Compatibility for IsAlphaNumeric()
 343          *      @section                CompCharacterIsAlphaNumericPageIssueSection Issues
 344          *                                      Implementing this method in OSP compatible applications has the following issues: @n
 345          *
 346          *      -# The method returns @c true only if the character is an alphabet character, it cannot check other Unicode characters in the letter and digit category.
 347          *
 348          *      @section                CompCharacterIsAlphaNumericPageSolutionSection Resolutions
 349          *
 350          *      This issue mentioned above is resolved in %Tizen.
 351          *      @endif
 352          */
 353
 354
 355         /**
 356          *      Checks whether the input character is a digit.
 357          *
 358          *      @since 2.0
 359          *
 360          *      @return         @c true if the input character is a digit, @n
 361          *                              else @c false
 362          *      @param[in]      ch      The Unicode character
 363          */
 364         static bool IsDigit(wchar_t ch);
 365
 366         /**
 367          *      Checks whether the input character is a Unicode letter.
 368          *      @if OSPCOMPAT
 369          *      @brief <i> [Compatibility] </i>
 370          *      @endif
 371          *      @since 2.0
 372          *      @if OSPCOMPAT
 373          *      @compatibility  This method has compatibility issues with OSP compatible applications. @n
 374          *                                      For more information, see @ref CompCharacterIsLetterPage "here".
 375          *      @endif
 376          *      @return         @c true if the input character is an alphabet, @n
 377          *                              else @c false
 378          *      @param[in]      ch      The Unicode character
 379          */
 380         static bool IsLetter(wchar_t ch);
 381
 382         /**
 383          *      @if OSPCOMPAT
 384          *      @page                   CompCharacterIsLetterPage Compatibility for IsLetter()
 385          *      @section                CompCharacterIsLetterPageIssueSection Issues
 386          *                                      Implementing this method in OSP compatible applications has the following issues: @n
 387          *
 388          *      -# The method returns @c true only if the character is an alphabet character, it cannot check other Unicode characters in the letter category.
 389          *
 390          *      @section                CompCharacterIsLetterPageSolutionSection Resolutions
 391          *
 392          *      This issue mentioned above is resolved in %Tizen.
 393          *      @endif
 394          */
 395
 396         /**
 397          *      @if OSPDEPREC
 398          *      Checks whether the input character is a lowercase alphabet.
 399          *
 400          *      @brief          <i> [Deprecated] </i>
 401          *      @deprecated This method is deprecated as a new method has been introduced.
 402          *                              Instead of using this method, use the IsLowerCase(wchar_t ch) method that supports Unicode characters.
 403          *      @since 2.0
 404          *
 405          *      @return         @c true if the input character is a lowercase alphabet, @n
 406          *                              else @c false
 407          *      @param[in]      ch      The Unicode character
 408          *      @endif
 409          */
 410         static bool IsLower(wchar_t ch);
 411
 412         /**
 413          *      Checks whether the input character is a lowercase alphabet. @n
 414          *      Unicode characters other than English alphabets are also supported.
 415          *
 416          *      @since 2.0
 417          *
 418          *      @return         @c true if the input character is a lowercase alphabet, @n
 419          *                              else @c false
 420          *      @param[in]      ch The Unicode character
 421          */
 422         static bool IsLowerCase(wchar_t ch);
 423
 424         /**
 425          *      @if OSPDEPREC
 426          *      Checks whether the input character is an uppercase alphabet.
 427          *
 428          *      @brief          <i> [Deprecated] </i>
 429          *      @deprecated This method is deprecated as a new method has been introduced.
 430          *                              Instead of using this method, use the IsUpperCase(wchar_t ch) method that also supports Unicode characters other than the English alphabets.
 431          *      @since 2.0
 432          *
 433          *      @return         @c true if the input character is an uppercase alphabet, @n
 434          *                              else @c false
 435          *      @param[in]      ch The Unicode character
 436          *      @endif
 437          */
 438         static bool IsUpper(wchar_t ch);
 439
 440         /**
 441          *      Checks whether the input character is an uppercase alphabet. @n
 442          *      Unicode characters other than English alphabets are also supported.
 443          *
 444          *      @since 2.0
 445          *
 446          *      @return         @c true if the input character is an uppercase alphabet, @n
 447          *                              else @c false
 448          *      @param[in]      ch The Unicode character
 449          */
 450         static bool IsUpperCase(wchar_t ch);
 451
 452         /**
 453          *      Returns the value of the input character in the supplied @c radix. @n
 454          *      The value of @c radix must be between ::RADIX_MIN and ::RADIX_MAX.
 455          *
 456          *      @since 2.0
 457          *
 458          *      @return         The integer value of the input character in the supplied @c radix
 459          *      @param[in]      ch   The character that determines the value
 460          *      @param[in]      radix   The radix
 461          */
 462         static int ToDigit(wchar_t ch, int radix);
 463
 464         /**
 465          *      Returns the value which represents the input digit in the specified @c radix. @n
 466          *      The value of @c radix must be between ::RADIX_MIN and ::RADIX_MAX.
 467          *
 468          *      @since 2.0
 469          *
 470          *      @return         The Unicode character of the input digit in the specified @c radix @n
 471          *                              else a @c null character (U+0000)
 472          *      @param[in]      digit   The digit that determines the value
 473          *      @param[in]      radix   The radix
 474          */
 475         static wchar_t ForDigit(int digit, int radix);
 476
 477         /**
 478          *      Gets the numeric value of the input unicode character. @n
 479          *      This is used when some numeric values are fractions, negative, or too large for the @c int value.
 480          *
 481          *      @since 2.0
 482          *
 483          *      @return         The @c double value @n
 484          *                              NO_NUMERIC_VALUE is returned for characters without any numeric values in the Unicode character.
 485          *      @param[in]      ch      The Unicode character
 486          */
 487         static double GetNumericValue(wchar_t ch);
 488
 489         /**
 490          *      Checks whether the input character is defined, which usually means that it is assigned a character in the Unicode.
 491          *
 492          *      @since 2.0
 493          *
 494          *      @return         @c true if the Unicode character is an assigned character, @n
 495          *                              else @c false
 496          *      @param[in]      ch      The Unicode character
 497          */
 498         static bool IsDefined(wchar_t ch);
 499
 500         /**
 501          *      Checks whether the input character is a whitespace character. @n
 502          *      A character is considered to be a whitespace character if and only if it satisfies one of the following criteria:
 503          *
 504          *      @since 2.0
 505          *
 506          *      @return         @c true if the Unicode character is a whitespace character, @n
 507          *                              else @c false
 508          *      @param[in]      ch      The Unicode character
 509          *
 510          *      @code
 511          *      It is a Unicode Separator character, but is also not a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 512          *      It is U+0009 HORIZONTAL TABULATION.
 513          *      It is U+000A LINE FEED.
 514          *      It is U+000B VERTICAL TABULATION.
 515          *      It is U+000C FORM FEED.
 516          *      It is U+000D CARRIAGE RETURN.
 517          *      It is U+001C FILE SEPARATOR.
 518          *      It is U+001D GROUP SEPARATOR.
 519          *      It is U+001E RECORD SEPARATOR.
 520          *      It is U+001F UNIT SEPARATOR.
 521          *      @endcode
 522          */
 523         static bool IsWhitespace(wchar_t ch);
 524
 525
 526         /**
 527          *      Checks whether the input character is a title character.
 528          *
 529          *      @since 2.0
 530          *
 531          *      @return         @c true if the Unicode character is a title character, @n
 532          *                              else @c false
 533          *      @param[in]      ch      The Unicode character
 534          */
 535         static bool IsTitleCase(wchar_t ch);
 536
 537         /**
 538          *      Returns the title case equivalent for the input character if it exists.
 539          *
 540          *      @since 2.0
 541          *
 542          *      @return         The title case character equivalent for the input character @n
 543          *                              The character itself is returned if no equivalent is defined.
 544          *      @param[in]      ch      The Unicode character
 545          */
 546         static wchar_t ToTitleCase(wchar_t ch);
 547
 548         /**
 549          *      Checks whether the input character is an ISO control code.
 550          *
 551          *      @since 2.0
 552          *
 553          *      @return         @c true if the Unicode character is an ISO control character, @n
 554          *                              else @c false
 555          *      @param[in]      ch      The Unicode character
 556          */
 557         static bool IsISOControl(wchar_t ch);
 558
 559
 560         /**
 561          * A constant holding the largest value of type @c wchar_t, 0xFFFF.
 562          *
 563          * @since 2.0
 564          */
 565         static const wchar_t VALUE_MAX = 0x10FFFF;
 566
 567         /**
 568          * A constant holding the smallest value of type @c wchar_t, 0x0000.
 569          *
 570          * @since 2.0
 571          */
 572         static const wchar_t VALUE_MIN = 0x0000;
 573
 574         /**
 575          * The minimum radix available for converting to and from strings. @n
 576          * Same value as ::RADIX_BINARY.
 577          *
 578          * @since 2.0
 579          */
 580         static const int RADIX_MIN = 2;
 581
 582         /**
 583          * The radix for a binary number.
 584          *
 585          * @since 2.0
 586          */
 587         static const int RADIX_BINARY = 2;
 588
 589         /**
 590          * The radix for a decimal number.
 591          *
 592          * @since 2.0
 593          */
 594         static const int RADIX_DECIMAL = 10;
 595
 596         /**
 597          * The radix for an octal number.
 598          *
 599          * @since 2.0
 600          */
 601         static const int RADIX_OCTAL = 8;
 602
 603         /**
 604          * The radix for a hexadecimal number.
 605          *
 606          * @since 2.0
 607          */
 608         static const int RADIX_HEXADECIMAL = 16;
 609
 610         /**
 611          * The maximum radix available for converting to and from strings. Same value as ::RADIX_HEXADECIMAL.
 612          *
 613          * @since 2.0
 614          */
 615         static const int RADIX_MAX = 36;
 616
 617         /**
 618          * The special value that is returned by the GetNumericValue(wchar_t ch) method when no numeric value is defined for the unicode character.
 619          *
 620          * @since 2.0
 621          */
 622         static const double NO_NUMERIC_VALUE = -123456789.0;
 623
 624 private:
 625         friend class Int8;
 626         friend class Short;
 627         friend class Long;
 628         friend class Integer;
 629
 630         wchar_t __val;
 631
 632         friend class _CharacterImpl;
 633         class _CharacterImpl* __pCharacterImpl;
 634
 635 }; // Character
 636
 637 }} // Tizen::Base
 638
 639 #endif // _FBASE_CHARACTER_H_