inc/FTextUtf8Encoder.h

   1 //
   2 // Open Service Platform
   3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   4 //
   5 // Licensed under the Apache License, Version 2.0 (the License);
   6 // you may not use this file except in compliance with the License.
   7 // You may obtain a copy of the License at
   8 //
   9 //     http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing, software
  12 // distributed under the License is distributed on an "AS IS" BASIS,
  13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 // See the License for the specific language governing permissions and
  15 // limitations under the License.
  16 //
  17
  18 /**
  19  * @file                FTextUtf8Encoder.h
  20  * @brief               This is the header file for the %Utf8Encoder class.
  21  *
  22  * This header file contains the declarations of the %Utf8Encoder class.
  23  *
  24  */
  25 #ifndef _FTEXT_UTF8_ENCODER_H_
  26 #define _FTEXT_UTF8_ENCODER_H_
  27
  28 #include <FTextEncoder.h>
  29 #include <FBaseByteBuffer.h>
  30
  31
  32 namespace Tizen { namespace Text
  33 {
  34
  35 /**
  36  * @class       Utf8Encoder
  37  * @brief       This class is an implementation of the UTF-8 encoder.
  38  *
  39  * @since       2.0
  40  *
  41  * @final       This class is not intended for extension.
  42  *
  43  * The %Utf8Encoder class converts the blocks of characters into encoded blocks of bytes.
  44  * Universal Transformation Format-8 (UTF-8) is a translated language that is used on the internet. It converts the
  45  * unicode into 8-bit bytes. @n UTF-8 encoding supports all unicode character values and surrogates.
  46  * Note that %Utf8Encoder is not used in reality, because keeping the internal state of encoding from unicode
  47  * to UTF-8 can be avoided if the translation unit is even.
  48  * This means that the real implementation of %Utf8Encoder is the same as that of Utf8Encoding in this state.
  49  *
  50  * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_text_data_separate_blocks.htm">Converting Text Data in Separate Blocks</a>.
  51  *
  52  * The following example demonstrates how to use the %Utf8Encoder class.
  53  *
  54  * @code
  55  *      #include <FBase.h>
  56  *      #include <FText.h>
  57  *
  58  *      using namespace Tizen::Base;
  59  *      using namespace Tizen::Text;
  60  *
  61  *      void
  62  *      MyClass::Utf8EncoderSample(void)
  63  *      {
  64  *              Utf8Encoder utf8En;
  65  *
  66  *              // Fills a WcharBuffer with the unicode strings to encode it into UTF8 format bytes.
  67  *              WcharBuffer* pMB = PrepareUtf8EncoderSample();
  68  *
  69  *              int charBufSize = pMB->GetCapacity();
  70  *
  71  *              int byteCount = 0;
  72  *              utf8En.GetByteCount(*pMB, 0, charBufSize, byteCount);
  73  *
  74  *              ByteBuffer enBytes;
  75  *              enBytes.Construct(byteCount);
  76  *
  77  *              int charIndex = 0;
  78  *              int charCount = 100;
  79  *
  80  *              while (charIndex < charBufSize)
  81  *              {
  82  *                      ByteBuffer* pBB = null;
  83  *
  84  *                      if (charBufSize - charIndex < charCount)
  85  *                      {
  86  *                              charCount = charBufSize - charIndex;
  87  *                      }
  88  *
  89  *                      // Converts Unicode to Utf8 using Utf8Encoder.
  90  *                      pBB = utf8En.GetBytesN(*pMB, charIndex, charCount); // flush is false
  91  *
  92  *                      enBytes.CopyFrom(*pBB);
  93  *                      charIndex += charCount;
  94  *
  95  *                      delete pBB;
  96  *              }
  97  *
  98  *              enBytes.SetByte('\0');
  99  *              enBytes.Rewind();
 100  *
 101  *              delete pMB;
 102  *      }
 103  * @endcode
 104  */
 105
 106
 107 class _OSP_EXPORT_ Utf8Encoder
 108         : public Encoder
 109 {
 110 public:
 111         /**
 112          * This is the default constructor for this class.
 113          *
 114          * @since       2.0
 115          *
 116          */
 117         Utf8Encoder(void);
 118
 119         /**
 120          * This is the destructor for this class. @n
 121          * This destructor overrides Tizen::Text::Encoder::~Encoder().
 122          *
 123          * @since       2.0
 124          */
 125         virtual ~Utf8Encoder(void);
 126
 127         /**
 128          * Gets the total number of bytes that are required to encode a range of characters in the specified Tizen::Base::WcharBuffer instance.
 129          *
 130          * @since                       2.0
 131          *
 132          * @return              An error code
 133          * @param[in]   chars                  An instance of Tizen::Base::WcharBuffer to encode
 134          * @param[in]   charIndex              The index from where encoding begins in the Tizen::Base::WcharBuffer instance
 135          * @param[in]   charCount                 The total number of characters to encode
 136          * @param[in]   flush                     Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 137          *                                                              else @c false
 138          * @param[out]  byteCount               The total number of bytes required to encode the specified range of characters
 139          * @exception   E_SUCCESS                The method is successful.
 140          * @exception   E_INVALID_ARG            A specified input parameter is invalid, or
 141          *                                        the specified @c chars is empty.
 142          * @exception   E_OUT_OF_RANGE                   The value of an argument is outside the valid range defined by the method, or
 143          *                                                                               the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
 144          * @exception   E_UNDERFLOW              This operation has caused the memory to underflow, or
 145          *                                                                               the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
 146          * @exception   E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
 147          * @see                 Utf8Decoder::GetCharCount()
 148          */
 149         virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars,
 150                 int charIndex, int charCount, int& byteCount, bool flush = false) const;
 151
 152         /**
 153          * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer.
 154          *
 155          * @since                       2.0
 156          *
 157          * @return                                 A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
 158          *                                 else @c null if an exception occurs @n
 159          *                                                         The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
 160          * @param[in]   chars              An instance of Tizen::Base::WcharBuffer to encode
 161          * @param[in]   flush              Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 162          *                                                 else @c false
 163          * @exception   E_SUCCESS                The method is successful.
 164          * @exception   E_OUT_OF_MEMORY      The memory is insufficient.
 165          * @exception   E_INVALID_ARG            A specified input parameter is invalid, or
 166          *                                       the specified @c chars is empty.
 167          * @exception   E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
 168          * @remarks             The specific error code can be accessed using the GetLastResult() method.
 169          * @see         Utf8Decoder::GetCharsN()
 170          */
 171         virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars, bool flush = false) const;
 172
 173         /**
 174          * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer as per the specified range.
 175          *
 176          * @since                       2.0
 177          *
 178          * @return                                 A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
 179          *                                 else @c null if an exception occurs @n
 180          *                                                          The buffer limit is the position of the last encoded byte and the starting position is zero.
 181          * @param[in]   chars               An instance of Tizen::Base::WcharBuffer to encode
 182          * @param[in]   charIndex               The index from where encoding begins in the Tizen::Base::WcharBuffer instance
 183          * @param[in]   charCount The total number of characters to encode
 184          * @param[in]   flush                    Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 185          *                                                       else @c false
 186          * @exception   E_SUCCESS                The method is successful.
 187          * @exception   E_OUT_OF_MEMORY          The memory is insufficient.
 188          * @exception   E_INVALID_ARG            A specified input parameter is invalid, or
 189          *                                       the specified @c chars is empty.
 190          * @exception   E_OUT_OF_RANGE          The value of an argument is outside the valid range defined by the method, or
 191          *                                       the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
 192          * @exception   E_UNDERFLOW                  This operation has caused the memory to underflow, or
 193          *                                        the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
 194          * @exception   E_INVALID_ENCODING_RANGE  The specified string contains code points that are outside the bounds of the character encoding scheme.
 195          * @remarks             The specific error code can be accessed using the GetLastResult() method.
 196          * @remarks             The pointer to the Tizen::Base::ByteBuffer instance is not terminated by a @c null character.
 197          * @see         Utf8Decoder::GetCharsN()
 198          */
 199         virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount,
 200                 bool flush = false) const;
 201
 202 private:
 203         /**
 204          * The implementation of this copy constructor is intentionally blank and declared as private to
 205          * prohibit copying of objects.
 206          */
 207         Utf8Encoder(const Utf8Encoder& utf8Encoder);
 208
 209         /**
 210          * The implementation of this copy assignment operator is intentionally blank and declared as private
 211          * to prohibit copying of objects.
 212          */
 213         Utf8Encoder& operator =(const Utf8Encoder& utf8Encoder);
 214
 215         friend class _Utf8EncoderImpl;
 216         class _Utf8EncoderImpl* __pUtf8EncoderImpl;
 217 };
 218
 219 } } // Tizen::Text
 220 #endif //_FTEXT_UTF8_ENCODER_H_