inc/FTextUtf8Decoder.h

   1 //
   2 // Open Service Platform
   3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
   4 //
   5 // Licensed under the Apache License, Version 2.0 (the License);
   6 // you may not use this file except in compliance with the License.
   7 // You may obtain a copy of the License at
   8 //
   9 //     http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing, software
  12 // distributed under the License is distributed on an "AS IS" BASIS,
  13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 // See the License for the specific language governing permissions and
  15 // limitations under the License.
  16 //
  17
  18 /**
  19  * @file                FTextUtf8Decoder.h
  20  * @brief               This is the header file for the %Utf8Decoder class.
  21  *
  22  * This header file contains the declarations of the %Utf8Decoder class.
  23  *
  24  */
  25 #ifndef _FTEXT_UTF8_DECODER_H_
  26 #define _FTEXT_UTF8_DECODER_H_
  27
  28 #include <FTextDecoder.h>
  29
  30
  31 namespace Tizen { namespace Text
  32 {
  33 /**
  34  * @class       Utf8Decoder
  35  * @brief       This class is an implementation of the UTF-8 decoder.
  36  *
  37  * @since       2.0
  38  *
  39  * @final       This class is not intended for extension.
  40  *
  41  * The %Utf8Decoder class converts blocks of bytes into decoded blocks of characters.
  42  * Universal Transformation Format-8 (UTF-8) is a translated language that is used on the internet.
  43  * It converts the unicode into 8-bit bytes.
  44  * UTF-8 encoding supports all unicode character values and surrogates.
  45  *
  46  * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_text_data_separate_blocks.htm">Converting Text Data in Separate Blocks</a>.
  47  *
  48  * The following example demonstrates how to use the %Utf8Decoder class.
  49  *
  50  *      @code
  51  *      #include <FBase.h>
  52  *      #include <FText.h>
  53  *
  54  *      using namespace Tizen::Base;
  55  *      using namespace Tizen::Text;
  56  *
  57  *      void
  58  *      MyClass::Utf8DecoderSample(void)
  59  *      {
  60  *              Utf8Decoder utf8De;
  61  *
  62  *              // Fills a ByteBuffer with long strings to decode it into the unicode strings
  63  *              ByteBuffer* pBB = PrepareUtf8DecoderSample();
  64  *
  65  *              int byteBufSize = pBB->GetCapacity();
  66  *
  67  *              int charCount;
  68  *              utf8De.GetCharCount(*pBB, 0, byteBufSize, charCount);
  69  *
  70  *              WcharBuffer deChars;
  71  *              deChars.Construct(charCount + 1);               // for L'\0'
  72  *
  73  *              int byteIndex = 0;
  74  *              int byteCount = 100;
  75  *
  76  *              while (byteIndex < byteBufSize)
  77  *              {
  78  *                      WcharBuffer* pMB = null;
  79  *
  80  *                      if (byteBufSize - byteIndex < byteCount)
  81  *                      {
  82  *                              byteCount = byteBufSize - byteIndex;
  83  *                      }
  84  *
  85  *                      // Converts Utf8 to Unicode using Utf8Decoder
  86  *                      pMB = utf8De.GetCharsN(*pBB, byteIndex, byteCount);
  87  *
  88  *                      deChars.CopyFrom(*pMB);
  89  *                      byteIndex += byteCount;
  90  *
  91  *                      delete pMB;
  92  *              }
  93  *
  94  *              deChars.Set(L'\0');
  95  *              deChars.Rewind();
  96  *
  97  *              delete pBB;
  98  *      }
  99  *      @endcode
 100  */
 101
 102
 103 class _OSP_EXPORT_ Utf8Decoder
 104         : public Decoder
 105 {
 106 public:
 107         /**
 108          * This is the default constructor for this class.
 109          *
 110          * @since       2.0
 111          */
 112         Utf8Decoder(void);
 113         /**
 114          * This is the destructor for this class. @n
 115          * This destructor overrides Tizen::Text::Decoder::~Decoder().
 116          *
 117          * @since       2.0
 118          */
 119         virtual ~Utf8Decoder(void);
 120
 121         /**
 122          * Gets the total number of characters that are generated by decoding an instance of Tizen::Base::ByteBuffer.
 123          *
 124          * @since                       2.0
 125          *
 126          * @return              An error code
 127          * @param[in]   bytes                   An instance of Tizen::Base::ByteBuffer to decode
 128          * @param[in] byteIndex                  The index from where decoding begins
 129          * @param[in] byteCount                 The total number of bytes to decode
 130          * @param[out]  charCount               The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
 131          * @param[in]   flush                   Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 132          *                                                              else @c false
 133          * @exception   E_SUCCESS                               The method is successful.
 134          * @exception   E_INVALID_ARG                   A specified input parameter is invalid, or
 135          *                                      the specified @c bytes is empty.
 136          * @exception   E_OUT_OF_RANGE                  The value of an argument is outside the valid range defined by the method, or
 137          *                                      the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
 138          * @exception   E_UNDERFLOW                             This operation has caused the memory to underflow, or
 139          *                                                                              the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
 140          * @exception   E_INVALID_ENCODING_RANGE The conversion has failed due to invalid encoding range.
 141          */
 142         virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount, int& charCount, bool flush = false) const;
 143
 144         /**
 145          * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer.
 146          *
 147          * @since                       2.0
 148          *
 149          * @return                                     A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
 150          *                                     else @c null if an exception occurs @n
 151          *                                                     The buffer limit is the position of the last decoded byte plus one and the starting position is zero.
 152          * @param[in]   bytes                 An instance of Tizen::Base::ByteBuffer to decode
 153          * @param[in]   flush                 Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 154          *                                                    else @c false
 155          * @exception   E_SUCCESS                The method is successful.
 156          * @exception   E_OUT_OF_MEMORY       The memory is insufficient.
 157          * @exception   E_INVALID_ARG            A specified input parameter is invalid, or
 158          *                                         the specified @c bytes is empty.
 159          * @exception   E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
 160          * @remarks             The specific error code can be accessed using the GetLastResult() method.
 161          */
 162         virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes, bool flush = false) const;
 163
 164         /**
 165          * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer as per the specified range.
 166          *
 167          * @since                       2.0
 168          * @return                                       A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
 169          *                                       else @c null if an exception occurs @n
 170          *                                                       The buffer limit is the position of the last decoded byte and the starting position is zero.
 171          * @param[in]   bytes                    An instance of Tizen::Base::ByteBuffer to decode
 172          * @param[in]   byteIndex                 The index from where decoding begins
 173          * @param[in]   byteCount                The total number of bytes to decode
 174          * @param[in]   flush                    Set to @c true to allow this instance to flush its state at the end of the conversion, @n
 175          *                                                       else @c false
 176          * @exception   E_SUCCESS                The method is successful.
 177          * @exception   E_OUT_OF_MEMORY          The memory is insufficient.
 178          * @exception   E_INVALID_ARG            A specified input parameter is invalid, or
 179          *                                       the specified @c bytes is empty.
 180          * @exception   E_OUT_OF_RANGE           The value of an argument is outside the valid range defined by the method, or
 181          *                                                                             the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
 182          * @exception   E_UNDERFLOW                  This operation has caused the memory to underflow, or
 183          *                                                                                the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
 184          * @exception   E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
 185          * @remarks     The GetCharsN() method maintains state consistency between conversions.
 186          * @remarks             The specific error code can be accessed using the GetLastResult() method.
 187          * @remarks             The pointer to the Tizen::Base::ByteBuffer instance is not terminated by a @c null character.
 188          *
 189          */
 190         virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount,
 191                 bool flush = false) const;
 192
 193 private:
 194         /**
 195          * The implementation of this copy constructor is intentionally blank and declared as private to
 196          * prohibit copying of objects.
 197          */
 198         Utf8Decoder(const Utf8Decoder& utf8Decoder);
 199
 200         /**
 201          * The implementation of this copy assignment operator is intentionally blank and declared as private
 202          * to prohibit copying of objects.
 203          */
 204         Utf8Decoder& operator =(const Utf8Decoder& utf8Decoder);
 205
 206         friend class _Utf8DecoderImpl;
 207         class _Utf8DecoderImpl* __pUtf8DecoderImpl;
 208 };
 209
 210
 211 } } // Tizen::Text
 212 #endif //_FTEXT_UTF8_DECODER_H_