2 // Open Service Platform
3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 // Licensed under the Apache License, Version 2.0 (the License);
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
19 * @file FTextUtf8Encoding.h
20 * @brief This is the header file for the %Utf8Encoding class.
22 * This header file contains the declarations of the %Utf8Encoding class.
23 * This class is derived from the Encoding class.
25 #ifndef _FTEXT_UTF8_ENCODING_H_
26 #define _FTEXT_UTF8_ENCODING_H_
28 #include <FTextEncoding.h>
29 #include <FTextUtf8Encoder.h>
30 #include <FTextUtf8Decoder.h>
33 namespace Tizen { namespace Text
38 * @brief This class is an implementation of the UTF-8 encoding.
42 * @final This class is not intended for extension.
44 * The %Utf8Encoding class is an implementation of the UTF-8 encoding.
45 * Universal Transformation Format-8 (UTF-8) is a translated language used on the internet. It converts Unicode to 8-bit bytes. UTF-8 encoding supports all the Unicode character values and surrogates.
47 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_all_text_data.htm">Converting All Text Data at Once</a>.
49 * The following example demonstrates how to use the %Utf8Encoding class.
55 * using namespace Tizen::Base;
56 * using namespace Tizen::Text;
59 * MyClass::Utf8EncodingSample(void)
63 * String str(L"(\u03a0) and (\u03a3)");
66 * utf8.GetByteCount(str, byteCount);
69 * ByteBuffer* pBuffer = utf8.GetBytesN(str);
72 * utf8.GetCharCount(*pBuffer, charCount);
76 * utf8.GetString(*pBuffer, decodedStr);
78 * if (str.Equals(decodedStr))
90 class _OSP_EXPORT_ Utf8Encoding
95 * This is the default constructor for this class.
102 * This is the destructor for this class. @n
103 * This destructor overrides Tizen::Text::Encoding::~Encoding().
107 virtual ~Utf8Encoding(void);
110 * Gets the total number of bytes that are generated by encoding an instance of specified string. @n
111 * The %GetByteCount() method determines the total number of bytes that are generated when the specified string is encoded.
115 * @return An error code
116 * @param[in] str The string to encode
117 * @param[out] byteCount The total number of bytes required to encode the string
118 * @exception E_SUCCESS The method is successful.
119 * @exception E_INVALID_ARG A specified input parameter is invalid, or
120 * the specified @c str is an empty string.
121 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
122 * @see GetMaxByteCount()
124 virtual result GetByteCount(const Tizen::Base::String& str, int& byteCount) const;
127 * Gets the total number of bytes that are generated by encoding an instance of specified Tizen::Base::WcharBuffer. @n
128 * The %GetByteCount() method determines the exact number of bytes
129 * produced if the given array of characters is encoded.
133 * @return An error code
134 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
135 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
136 * @exception E_SUCCESS The method is successful.
137 * @exception E_INVALID_ARG A specified input parameter is invalid, or
138 * the specified @c chars is empty.
139 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
140 * @see GetMaxByteCount()
142 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int& byteCount) const;
145 * Gets the total number of bytes that are required to encode a range of characters in the specified Tizen::Base::WcharBuffer instance. @n
146 * The %GetByteCount() method determines the total number of bytes that are generated when the specified array of characters is encoded.
150 * @return An error code
151 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
152 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
153 * @param[in] charCount The total number of characters to encode
154 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
155 * @exception E_SUCCESS The method is successful.
156 * @exception E_INVALID_ARG A specified input parameter is invalid, or
157 * the specified @c chars is empty.
158 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
159 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
160 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
161 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
162 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
163 * @see GetMaxByteCount()
165 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount, int& byteCount) const;
168 * Encodes an instance of the specified Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer.
172 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
173 * else @c null if an exception occurs @n
174 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
175 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
176 * @exception E_SUCCESS The method is successful.
177 * @exception E_OUT_OF_MEMORY The memory is insufficient.
178 * @exception E_INVALID_ARG The specified @c chars is empty or invalid.
179 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
180 * @remarks The specific error code can be accessed using the GetLastResult() method.
183 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars) const;
186 * Encodes an instance of the specified Tizen::Base::String into an instance of Tizen::Base::ByteBuffer.
190 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
191 * else @c null if an exception occurs @n
192 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
193 * @param[in] str A string to encode
194 * @exception E_SUCCESS The method is successful.
195 * @exception E_OUT_OF_MEMORY The memory is insufficient.
196 * @exception E_INVALID_ARG A specified input parameter is invalid, or
197 * the specified @c str is an empty string.
198 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
199 * @remarks The specific error code can be accessed using the GetLastResult() method.
202 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::String& str) const;
205 * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
206 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
210 * @return An error code
211 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
212 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
213 * @param[in] charCount The total number of characters to encode
214 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
215 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
216 * @exception E_SUCCESS The method is successful.
217 * @exception E_OUT_OF_MEMORY The memory is insufficient.
218 * @exception E_INVALID_ARG A specified input parameter is invalid, or
219 * the specified @c chars or @c bytes is empty.
220 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
221 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
222 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
223 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
224 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
225 * the specified @c bytes does not contain sufficient space to store the encoded characters.
226 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
229 virtual result GetBytes(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount,
230 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
233 * Encodes an instance of Tizen::Base::String into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
234 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
238 * @return An error code
239 * @param[in] str A string to encode
240 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
241 * @param[in] charCount The total number of characters to encode
242 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
243 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
244 * @exception E_SUCCESS The method is successful.
245 * @exception E_OUT_OF_MEMORY The memory is insufficient.
246 * @exception E_INVALID_ARG A specified input parameter is invalid, or
247 * the specified @c str or @c bytes is empty.
248 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
249 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c str.
250 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
251 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c str.
252 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
253 * the specified @c bytes does not contain sufficient space to store the encoded characters.
254 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
257 virtual result GetBytes(const Tizen::Base::String& str, int charIndex, int charCount,
258 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
261 * Gets the total number of characters that are generated by decoding an instance of Tizen::Base::ByteBuffer. @n
262 * The %GetCharCount() method determines the exact number of characters
263 * produced if the given range of bytes is converted.
267 * @return An error code
268 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
269 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
270 * @exception E_SUCCESS The method is successful.
271 * @exception E_INVALID_ARG A specified input parameter is invalid, or
272 * the specified @c bytes is empty.
273 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
274 * @see GetMaxCharCount()
276 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int& charCount) const;
279 * Gets the total number of characters that are generated by decoding a range of elements specified in the Tizen::Base::ByteBuffer instance. @n
280 * The %GetCharCount() method determines the exact number of characters
281 * produced if the given range of bytes is converted.
285 * @return An error code
286 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
287 * @param[in] byteIndex The index from where decoding begins
288 * @param[in] byteCount The total number of bytes to decode
289 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
290 * @exception E_SUCCESS The method is successful.
291 * @exception E_INVALID_ARG A specified input parameter is invalid, or
292 * the specified @c bytes is empty.
293 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
294 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
295 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
296 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
297 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
298 * @see GetMaxCharCount()
300 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount, int& charCount) const;
303 * Decodes an instance of the specified Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer.
307 * @return A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
308 * else @c null if an exception occurs @n
309 * The buffer limit is the position of the last decoded byte plus one and the position is zero.
310 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
311 * @exception E_SUCCESS The method is successful.
312 * @exception E_OUT_OF_MEMORY The memory is insufficient.
313 * @exception E_INVALID_ARG The specified @c bytes is empty or invalid.
314 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
315 * @remarks The specific error code can be accessed using the GetLastResult() method.
318 virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes) const;
321 * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer as per the specified range. @n
322 * The position and limit of the pointer to the %Tizen::Base::WcharBuffer instance is not changed.
326 * @return An error code
327 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
328 * @param[in] byteIndex The index from where decoding begins
329 * @param[in] byteCount The total number of bytes to decode
330 * @param[out] chars The Tizen::Base::WcharBuffer instance where the resultant decoded data is stored
331 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
332 * @exception E_SUCCESS The method is successful.
333 * @exception E_OUT_OF_MEMORY The memory is insufficient.
334 * @exception E_INVALID_ARG A specified input parameter is invalid, or
335 * the specified @c bytes or @c chars is empty.
336 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
337 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
338 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
339 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
340 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
341 * the specified @c chars does not contain sufficient space to store the decoded bytes.
342 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
345 virtual result GetChars(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount,
346 Tizen::Base::WcharBuffer& chars, int charIndex = 0) const;
349 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
353 * @return An error code
354 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
355 * @param[out] str A Tizen::Base::String instance @n
356 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
357 * @exception E_SUCCESS The method is successful.
358 * @exception E_OUT_OF_MEMORY The memory is insufficient.
359 * @exception E_INVALID_ARG A specified input parameter is invalid, or
360 * the specified @c bytes is empty.
361 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
364 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, Tizen::Base::String& str) const;
367 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
371 * @return An error code
372 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
373 * @param[in] index The index from where decoding begins
374 * @param[in] count The total number of bytes to decode
375 * @param[out] str A Tizen::Base::String instance @n
376 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
377 * @exception E_SUCCESS The method is successful.
378 * @exception E_OUT_OF_MEMORY The memory is insufficient.
379 * @exception E_INVALID_ARG A specified input parameter is invalid, or
380 * the specified @c bytes is empty.
381 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
382 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
383 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
384 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
385 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
388 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, int index, int count, Tizen::Base::String& str) const;
392 * Gets the maximum number of bytes required for encoding a given number of characters.
396 * @return The maximum number of bytes required for encoding a given number of characters
397 * @param[in] charCount The total number of characters to encode
398 * @remarks This method determines an appropriate buffer size for the byte arrays passed to GetBytes() for encoding.
399 * @see GetByteCount()
402 virtual int GetMaxByteCount(int charCount) const;
406 * Gets the maximum number of characters that are generated by decoding the specified number of bytes.
410 * @return The maximum number of characters generated by decoding the specified number of bytes
411 * @param[in] byteCount The total number of bytes to encode
412 * @remarks This method determines an appropriate buffer size for the character arrays passed to
413 * GetChars() or a decoder for encoding.
414 * @see GetCharCount()
417 virtual int GetMaxCharCount(int byteCount) const;
420 * Gets the encoder for the current encoding.
424 * @return A pointer to the Encoder instance for the current encoding
425 * @remarks Contrary to GetBytes(), an encoder can convert partial sequences of characters into
426 * partial sequences of bytes by maintaining the appropriate states between the conversions.
429 virtual Encoder* GetEncoderN(void) const;
432 * Gets the decoder for the current encoding.
436 * @return A pointer to the Decoder instance for the current encoding
437 * @remarks Contrary to GetChars(), a decoder can convert partial sequences of bytes
438 * into partial sequences of characters by maintaining the appropriate states between the conversions.
441 virtual Decoder* GetDecoderN(void) const;
444 * Gets the encoding type of the current instance.
448 * @return An encoding type
450 virtual Tizen::Base::String GetEncodingType(void) const;
454 * The implementation of this copy constructor is intentionally blank and declared as private to
455 * prohibit copying of objects.
457 Utf8Encoding(const Utf8Encoding& utf8Encoding);
460 * The implementation of this copy assignment operator is intentionally blank and declared as private
461 * to prohibit copying of objects.
463 Utf8Encoding& operator =(const Utf8Encoding& utf8Encoding);
465 friend class Utf8Decoder;
466 friend class _Utf8EncodingImpl;
467 class _Utf8EncodingImpl* __pUtf8EncodingImpl;
471 #endif //_FTEXT_UTF8_ENCODING_H_