2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FTextUtf8Encoding.h
19 * @brief This is the header file for the %Utf8Encoding class.
21 * This header file contains the declarations of the %Utf8Encoding class.
22 * This class is derived from the Encoding class.
24 #ifndef _FTEXT_UTF8_ENCODING_H_
25 #define _FTEXT_UTF8_ENCODING_H_
27 #include <FTextEncoding.h>
28 #include <FTextUtf8Encoder.h>
29 #include <FTextUtf8Decoder.h>
32 namespace Tizen { namespace Text
37 * @brief This class is an implementation of the UTF-8 encoding.
41 * @final This class is not intended for extension.
43 * The %Utf8Encoding class is an implementation of the UTF-8 encoding.
44 * Universal Transformation Format-8 (UTF-8) is a translated language used on the internet. It converts Unicode to 8-bit bytes. UTF-8 encoding supports all the Unicode character values and surrogates.
46 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_all_text_data.htm">Converting All Text Data at Once</a>.
48 * The following example demonstrates how to use the %Utf8Encoding class.
54 * using namespace Tizen::Base;
55 * using namespace Tizen::Text;
58 * MyClass::Utf8EncodingSample(void)
62 * String str(L"(\u03a0) and (\u03a3)");
65 * utf8.GetByteCount(str, byteCount);
68 * ByteBuffer* pBuffer = utf8.GetBytesN(str);
71 * utf8.GetCharCount(*pBuffer, charCount);
75 * utf8.GetString(*pBuffer, decodedStr);
77 * if (str.Equals(decodedStr))
89 class _OSP_EXPORT_ Utf8Encoding
94 * This is the default constructor for this class.
101 * This is the destructor for this class. @n
102 * This destructor overrides Tizen::Text::Encoding::~Encoding().
106 virtual ~Utf8Encoding(void);
109 * Gets the total number of bytes that are generated by encoding an instance of the specified string. @n
110 * The %GetByteCount() method determines the total number of bytes that are generated when the specified string is encoded.
114 * @return An error code
115 * @param[in] str The string to encode
116 * @param[out] byteCount The total number of bytes required to encode the string
117 * @exception E_SUCCESS The method is successful.
118 * @exception E_INVALID_ARG Either of the following conditions has occurred:
119 * - A specified input parameter is invalid.
120 * - The specified @c str is an empty string.
121 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
122 * @see GetMaxByteCount()
124 virtual result GetByteCount(const Tizen::Base::String& str, int& byteCount) const;
127 * Gets the total number of bytes that are generated by encoding an instance of specified Tizen::Base::WcharBuffer. @n
128 * The %GetByteCount() method determines the exact number of bytes
129 * produced if the given array of characters is encoded.
133 * @return An error code
134 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
135 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
136 * @exception E_SUCCESS The method is successful.
137 * @exception E_INVALID_ARG Either of the following conditions has occurred:
138 * - A specified input parameter is invalid.
139 * - The specified @c chars is empty.
140 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
141 * @see GetMaxByteCount()
143 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int& byteCount) const;
146 * Gets the total number of bytes that are required to encode a range of characters in the specified Tizen::Base::WcharBuffer instance. @n
147 * The %GetByteCount() method determines the total number of bytes that are generated when the specified array of characters is encoded.
151 * @return An error code
152 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
153 * @param[in] charIndex The index from where the encoding begins in the Tizen::Base::WcharBuffer instance
154 * @param[in] charCount The total number of characters to encode
155 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
156 * @exception E_SUCCESS The method is successful.
157 * @exception E_INVALID_ARG Either of the following conditions has occurred:
158 * - A specified input parameter is invalid.
159 * - The specified @c chars is empty.
160 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
161 * - A specified input parameter is outside the valid range defined by the method.
162 * - The length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
163 * @exception E_UNDERFLOW Either of the following conditions has occurred:
164 * - This operation has caused the memory to underflow.
165 * - The sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
166 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
167 * @see GetMaxByteCount()
169 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount, int& byteCount) const;
172 * Encodes an instance of the specified Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer.
176 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
177 * else @c null if an exception occurs @n
178 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
179 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
180 * @exception E_SUCCESS The method is successful.
181 * @exception E_OUT_OF_MEMORY The memory is insufficient.
182 * @exception E_INVALID_ARG The specified @c chars is empty or invalid.
183 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
184 * @remarks The specific error code can be accessed using the GetLastResult() method.
187 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars) const;
190 * Encodes an instance of the specified Tizen::Base::String into an instance of Tizen::Base::ByteBuffer.
194 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
195 * else @c null if an exception occurs @n
196 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
197 * @param[in] str The string to encode
198 * @exception E_SUCCESS The method is successful.
199 * @exception E_OUT_OF_MEMORY The memory is insufficient.
200 * @exception E_INVALID_ARG Either of the following conditions has occurred:
201 * - A specified input parameter is invalid.
202 * - The specified @c str is an empty string.
203 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
204 * @remarks The specific error code can be accessed using the GetLastResult() method.
207 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::String& str) const;
210 * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
211 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
215 * @return An error code
216 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
217 * @param[in] charIndex The index from where the encoding begins in the Tizen::Base::WcharBuffer instance
218 * @param[in] charCount The total number of characters to encode
219 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
220 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
221 * @exception E_SUCCESS The method is successful.
222 * @exception E_OUT_OF_MEMORY The memory is insufficient.
223 * @exception E_INVALID_ARG Either of the following conditions has occurred:
224 * - A specified input parameter is invalid.
225 * - The specified @c chars or @c bytes is empty.
226 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
227 * - A specified input parameter is outside the valid range defined by the method.
228 * - The length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
229 * @exception E_UNDERFLOW Either of the following conditions has occurred:
230 * - This operation has caused the memory to underflow.
231 * - The sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
232 * @exception E_OVERFLOW Either of the following conditions has occurred:
233 * - This operation has caused the memory to overflow.
234 * - The specified @c bytes does not contain sufficient space to store the encoded characters.
235 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
238 virtual result GetBytes(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount,
239 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
242 * Encodes an instance of Tizen::Base::String into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
243 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
247 * @return An error code
248 * @param[in] str The string to encode
249 * @param[in] charIndex The index from where the encoding begins in the Tizen::Base::WcharBuffer instance
250 * @param[in] charCount The total number of characters to encode
251 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
252 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
253 * @exception E_SUCCESS The method is successful.
254 * @exception E_OUT_OF_MEMORY The memory is insufficient.
255 * @exception E_INVALID_ARG Either of the following conditions has occurred:
256 * - A specified input parameter is invalid.
257 * - The specified @c str or @c bytes is empty.
258 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
259 * - A specified input parameter is outside the valid range defined by the method.
260 * - The length of the specified @c charIndex or @c charCount is greater than the length of the specified @c str.
261 * @exception E_UNDERFLOW Either of the following conditions has occurred:
262 * - This operation has caused the memory to underflow.
263 * - The sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c str.
264 * @exception E_OVERFLOW Either of the following conditions has occurred:
265 * - This operation has caused the memory to overflow.
266 * - The specified @c bytes does not contain sufficient space to store the encoded characters.
267 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
270 virtual result GetBytes(const Tizen::Base::String& str, int charIndex, int charCount,
271 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
274 * Gets the total number of characters that are generated by decoding an instance of Tizen::Base::ByteBuffer. @n
275 * The %GetCharCount() method determines the exact number of characters
276 * produced if the given range of bytes is converted.
280 * @return An error code
281 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
282 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
283 * @exception E_SUCCESS The method is successful.
284 * @exception E_INVALID_ARG Either of the following conditions has occurred:
285 * - A specified input parameter is invalid.
286 * - The specified @c bytes is empty.
287 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
288 * @see GetMaxCharCount()
290 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int& charCount) const;
293 * Gets the total number of characters that are generated by decoding a range of elements specified in the Tizen::Base::ByteBuffer instance. @n
294 * The %GetCharCount() method determines the exact number of characters
295 * produced if the given range of bytes is converted.
299 * @return An error code
300 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
301 * @param[in] byteIndex The index from where the decoding begins
302 * @param[in] byteCount The total number of bytes to decode
303 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
304 * @exception E_SUCCESS The method is successful.
305 * @exception E_INVALID_ARG Either of the following conditions has occurred:
306 * - A specified input parameter is invalid.
307 * - The specified @c bytes is empty.
308 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
309 * - A specified input parameter is outside the valid range defined by the method.
310 * - The length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
311 * @exception E_UNDERFLOW Either of the following conditions has occurred:
312 * - This operation has caused the memory to underflow.
313 * - The sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
314 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
315 * @see GetMaxCharCount()
317 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount, int& charCount) const;
320 * Decodes an instance of the specified Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer.
324 * @return A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
325 * else @c null if an exception occurs @n
326 * The buffer limit is the position of the last decoded byte plus one and the position is zero.
327 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
328 * @exception E_SUCCESS The method is successful.
329 * @exception E_OUT_OF_MEMORY The memory is insufficient.
330 * @exception E_INVALID_ARG The specified @c bytes is empty or invalid.
331 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
332 * @remarks The specific error code can be accessed using the GetLastResult() method.
335 virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes) const;
338 * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer as per the specified range. @n
339 * The position and limit of the pointer to the %Tizen::Base::WcharBuffer instance is not changed.
343 * @return An error code
344 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
345 * @param[in] byteIndex The index from where the decoding begins
346 * @param[in] byteCount The total number of bytes to decode
347 * @param[out] chars The Tizen::Base::WcharBuffer instance where the resultant decoded data is stored
348 * @param[in] charIndex The index from where the encoding begins in the Tizen::Base::WcharBuffer instance
349 * @exception E_SUCCESS The method is successful.
350 * @exception E_OUT_OF_MEMORY The memory is insufficient.
351 * @exception E_INVALID_ARG Either of the following conditions has occurred:
352 * - A specified input parameter is invalid.
353 * - The specified @c bytes or @c chars is empty.
354 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
355 * - A specified input parameter is outside the valid range defined by the method.
356 * - The length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
357 * @exception E_UNDERFLOW Either of the following conditions has occurred:
358 * - This operation has caused the memory to underflow.
359 * - The sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
360 * @exception E_OVERFLOW Either of the following conditions has occurred:
361 * - This operation has caused the memory to overflow.
362 * - The specified @c chars does not contain sufficient space to store the decoded bytes.
363 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
366 virtual result GetChars(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount,
367 Tizen::Base::WcharBuffer& chars, int charIndex = 0) const;
370 * Gets a string that contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
374 * @return An error code
375 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
376 * @param[out] str A Tizen::Base::String instance @n
377 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
378 * @exception E_SUCCESS The method is successful.
379 * @exception E_OUT_OF_MEMORY The memory is insufficient.
380 * @exception E_INVALID_ARG Either of the following conditions has occurred:
381 * - A specified input parameter is invalid.
382 * - The specified @c bytes is empty.
383 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
386 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, Tizen::Base::String& str) const;
389 * Gets a string that contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
393 * @return An error code
394 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
395 * @param[in] index The index from where the decoding begins
396 * @param[in] count The total number of bytes to decode
397 * @param[out] str A Tizen::Base::String instance @n
398 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
399 * @exception E_SUCCESS The method is successful.
400 * @exception E_OUT_OF_MEMORY The memory is insufficient.
401 * @exception E_INVALID_ARG Either of the following conditions has occurred:
402 * - A specified input parameter is invalid.
403 * - The specified @c bytes is empty.
404 * @exception E_OUT_OF_RANGE Either of the following conditions has occurred:
405 * - A specified input parameter is outside the valid range defined by the method.
406 * - The sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
407 * @exception E_UNDERFLOW Either of the following conditions has occurred:
408 * - This operation has caused the memory to underflow.
409 * - The sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
410 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
413 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, int index, int count, Tizen::Base::String& str) const;
417 * Gets the maximum number of bytes required for encoding the given number of characters.
421 * @return The maximum number of bytes required for encoding the given number of characters
422 * @param[in] charCount The total number of characters to encode
423 * @remarks This method determines an appropriate buffer size for the byte arrays passed to GetBytes() for encoding.
424 * @see GetByteCount()
426 virtual int GetMaxByteCount(int charCount) const;
430 * Gets the maximum number of characters that are generated by decoding the specified number of bytes.
434 * @return The maximum number of characters generated by decoding the specified number of bytes
435 * @param[in] byteCount The total number of bytes to encode
436 * @remarks This method determines an appropriate buffer size for the character arrays passed to
437 * GetChars() or a decoder for encoding.
438 * @see GetCharCount()
440 virtual int GetMaxCharCount(int byteCount) const;
443 * Gets the encoder for the current encoding.
447 * @return A pointer to the Encoder instance for the current encoding
448 * @remarks Contrary to GetBytes(), an encoder can convert partial sequences of characters into
449 * partial sequences of bytes by maintaining the appropriate states between the conversions.
451 virtual Encoder* GetEncoderN(void) const;
454 * Gets the decoder for the current encoding.
458 * @return A pointer to the Decoder instance for the current encoding
459 * @remarks Contrary to GetChars(), a decoder can convert partial sequences of bytes
460 * into partial sequences of characters by maintaining the appropriate states between the conversions.
462 virtual Decoder* GetDecoderN(void) const;
465 * Gets the encoding type of the current instance.
469 * @return The encoding type
471 virtual Tizen::Base::String GetEncodingType(void) const;
475 * The implementation of this copy constructor is intentionally blank and declared as private to
476 * prohibit copying of objects.
478 Utf8Encoding(const Utf8Encoding& utf8Encoding);
481 * The implementation of this copy assignment operator is intentionally blank and declared as private
482 * to prohibit copying of objects.
484 Utf8Encoding& operator =(const Utf8Encoding& utf8Encoding);
486 friend class Utf8Decoder;
487 friend class _Utf8EncodingImpl;
488 class _Utf8EncodingImpl* __pUtf8EncodingImpl;
492 #endif //_FTEXT_UTF8_ENCODING_H_