2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FTextUtf8Encoding.h
19 * @brief This is the header file for the %Utf8Encoding class.
21 * This header file contains the declarations of the %Utf8Encoding class.
22 * This class is derived from the Encoding class.
24 #ifndef _FTEXT_UTF8_ENCODING_H_
25 #define _FTEXT_UTF8_ENCODING_H_
27 #include <FTextEncoding.h>
28 #include <FTextUtf8Encoder.h>
29 #include <FTextUtf8Decoder.h>
32 namespace Tizen { namespace Text
37 * @brief This class is an implementation of the UTF-8 encoding.
41 * @final This class is not intended for extension.
43 * The %Utf8Encoding class is an implementation of the UTF-8 encoding.
44 * Universal Transformation Format-8 (UTF-8) is a translated language used on the internet. It converts Unicode to 8-bit bytes. UTF-8 encoding supports all the Unicode character values and surrogates.
46 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_all_text_data.htm">Converting All Text Data at Once</a>.
48 * The following example demonstrates how to use the %Utf8Encoding class.
54 * using namespace Tizen::Base;
55 * using namespace Tizen::Text;
58 * MyClass::Utf8EncodingSample(void)
62 * String str(L"(\u03a0) and (\u03a3)");
65 * utf8.GetByteCount(str, byteCount);
68 * ByteBuffer* pBuffer = utf8.GetBytesN(str);
71 * utf8.GetCharCount(*pBuffer, charCount);
75 * utf8.GetString(*pBuffer, decodedStr);
77 * if (str.Equals(decodedStr))
89 class _OSP_EXPORT_ Utf8Encoding
94 * This is the default constructor for this class.
101 * This is the destructor for this class. @n
102 * This destructor overrides Tizen::Text::Encoding::~Encoding().
106 virtual ~Utf8Encoding(void);
109 * Gets the total number of bytes that are generated by encoding an instance of specified string. @n
110 * The %GetByteCount() method determines the total number of bytes that are generated when the specified string is encoded.
114 * @return An error code
115 * @param[in] str The string to encode
116 * @param[out] byteCount The total number of bytes required to encode the string
117 * @exception E_SUCCESS The method is successful.
118 * @exception E_INVALID_ARG A specified input parameter is invalid, or
119 * the specified @c str is an empty string.
120 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
121 * @see GetMaxByteCount()
123 virtual result GetByteCount(const Tizen::Base::String& str, int& byteCount) const;
126 * Gets the total number of bytes that are generated by encoding an instance of specified Tizen::Base::WcharBuffer. @n
127 * The %GetByteCount() method determines the exact number of bytes
128 * produced if the given array of characters is encoded.
132 * @return An error code
133 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
134 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
135 * @exception E_SUCCESS The method is successful.
136 * @exception E_INVALID_ARG A specified input parameter is invalid, or
137 * the specified @c chars is empty.
138 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
139 * @see GetMaxByteCount()
141 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int& byteCount) const;
144 * Gets the total number of bytes that are required to encode a range of characters in the specified Tizen::Base::WcharBuffer instance. @n
145 * The %GetByteCount() method determines the total number of bytes that are generated when the specified array of characters is encoded.
149 * @return An error code
150 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
151 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
152 * @param[in] charCount The total number of characters to encode
153 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
154 * @exception E_SUCCESS The method is successful.
155 * @exception E_INVALID_ARG A specified input parameter is invalid, or
156 * the specified @c chars is empty.
157 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
158 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
159 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
160 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
161 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
162 * @see GetMaxByteCount()
164 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount, int& byteCount) const;
167 * Encodes an instance of the specified Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer.
171 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
172 * else @c null if an exception occurs @n
173 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
174 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
175 * @exception E_SUCCESS The method is successful.
176 * @exception E_OUT_OF_MEMORY The memory is insufficient.
177 * @exception E_INVALID_ARG The specified @c chars is empty or invalid.
178 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
179 * @remarks The specific error code can be accessed using the GetLastResult() method.
182 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars) const;
185 * Encodes an instance of the specified Tizen::Base::String into an instance of Tizen::Base::ByteBuffer.
189 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
190 * else @c null if an exception occurs @n
191 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
192 * @param[in] str A string to encode
193 * @exception E_SUCCESS The method is successful.
194 * @exception E_OUT_OF_MEMORY The memory is insufficient.
195 * @exception E_INVALID_ARG A specified input parameter is invalid, or
196 * the specified @c str is an empty string.
197 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
198 * @remarks The specific error code can be accessed using the GetLastResult() method.
201 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::String& str) const;
204 * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
205 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
209 * @return An error code
210 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
211 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
212 * @param[in] charCount The total number of characters to encode
213 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
214 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
215 * @exception E_SUCCESS The method is successful.
216 * @exception E_OUT_OF_MEMORY The memory is insufficient.
217 * @exception E_INVALID_ARG A specified input parameter is invalid, or
218 * the specified @c chars or @c bytes is empty.
219 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
220 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
221 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
222 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
223 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
224 * the specified @c bytes does not contain sufficient space to store the encoded characters.
225 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
228 virtual result GetBytes(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount,
229 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
232 * Encodes an instance of Tizen::Base::String into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
233 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
237 * @return An error code
238 * @param[in] str A string to encode
239 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
240 * @param[in] charCount The total number of characters to encode
241 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
242 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
243 * @exception E_SUCCESS The method is successful.
244 * @exception E_OUT_OF_MEMORY The memory is insufficient.
245 * @exception E_INVALID_ARG A specified input parameter is invalid, or
246 * the specified @c str or @c bytes is empty.
247 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
248 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c str.
249 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
250 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c str.
251 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
252 * the specified @c bytes does not contain sufficient space to store the encoded characters.
253 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
256 virtual result GetBytes(const Tizen::Base::String& str, int charIndex, int charCount,
257 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
260 * Gets the total number of characters that are generated by decoding an instance of Tizen::Base::ByteBuffer. @n
261 * The %GetCharCount() method determines the exact number of characters
262 * produced if the given range of bytes is converted.
266 * @return An error code
267 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
268 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
269 * @exception E_SUCCESS The method is successful.
270 * @exception E_INVALID_ARG A specified input parameter is invalid, or
271 * the specified @c bytes is empty.
272 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
273 * @see GetMaxCharCount()
275 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int& charCount) const;
278 * Gets the total number of characters that are generated by decoding a range of elements specified in the Tizen::Base::ByteBuffer instance. @n
279 * The %GetCharCount() method determines the exact number of characters
280 * produced if the given range of bytes is converted.
284 * @return An error code
285 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
286 * @param[in] byteIndex The index from where decoding begins
287 * @param[in] byteCount The total number of bytes to decode
288 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
289 * @exception E_SUCCESS The method is successful.
290 * @exception E_INVALID_ARG A specified input parameter is invalid, or
291 * the specified @c bytes is empty.
292 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
293 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
294 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
295 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
296 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
297 * @see GetMaxCharCount()
299 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount, int& charCount) const;
302 * Decodes an instance of the specified Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer.
306 * @return A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
307 * else @c null if an exception occurs @n
308 * The buffer limit is the position of the last decoded byte plus one and the position is zero.
309 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
310 * @exception E_SUCCESS The method is successful.
311 * @exception E_OUT_OF_MEMORY The memory is insufficient.
312 * @exception E_INVALID_ARG The specified @c bytes is empty or invalid.
313 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
314 * @remarks The specific error code can be accessed using the GetLastResult() method.
317 virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes) const;
320 * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer as per the specified range. @n
321 * The position and limit of the pointer to the %Tizen::Base::WcharBuffer instance is not changed.
325 * @return An error code
326 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
327 * @param[in] byteIndex The index from where decoding begins
328 * @param[in] byteCount The total number of bytes to decode
329 * @param[out] chars The Tizen::Base::WcharBuffer instance where the resultant decoded data is stored
330 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
331 * @exception E_SUCCESS The method is successful.
332 * @exception E_OUT_OF_MEMORY The memory is insufficient.
333 * @exception E_INVALID_ARG A specified input parameter is invalid, or
334 * the specified @c bytes or @c chars is empty.
335 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
336 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
337 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
338 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
339 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
340 * the specified @c chars does not contain sufficient space to store the decoded bytes.
341 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
344 virtual result GetChars(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount,
345 Tizen::Base::WcharBuffer& chars, int charIndex = 0) const;
348 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
352 * @return An error code
353 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
354 * @param[out] str A Tizen::Base::String instance @n
355 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
356 * @exception E_SUCCESS The method is successful.
357 * @exception E_OUT_OF_MEMORY The memory is insufficient.
358 * @exception E_INVALID_ARG A specified input parameter is invalid, or
359 * the specified @c bytes is empty.
360 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
363 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, Tizen::Base::String& str) const;
366 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
370 * @return An error code
371 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
372 * @param[in] index The index from where decoding begins
373 * @param[in] count The total number of bytes to decode
374 * @param[out] str A Tizen::Base::String instance @n
375 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
376 * @exception E_SUCCESS The method is successful.
377 * @exception E_OUT_OF_MEMORY The memory is insufficient.
378 * @exception E_INVALID_ARG A specified input parameter is invalid, or
379 * the specified @c bytes is empty.
380 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
381 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
382 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
383 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
384 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
387 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, int index, int count, Tizen::Base::String& str) const;
391 * Gets the maximum number of bytes required for encoding a given number of characters.
395 * @return The maximum number of bytes required for encoding a given number of characters
396 * @param[in] charCount The total number of characters to encode
397 * @remarks This method determines an appropriate buffer size for the byte arrays passed to GetBytes() for encoding.
398 * @see GetByteCount()
401 virtual int GetMaxByteCount(int charCount) const;
405 * Gets the maximum number of characters that are generated by decoding the specified number of bytes.
409 * @return The maximum number of characters generated by decoding the specified number of bytes
410 * @param[in] byteCount The total number of bytes to encode
411 * @remarks This method determines an appropriate buffer size for the character arrays passed to
412 * GetChars() or a decoder for encoding.
413 * @see GetCharCount()
416 virtual int GetMaxCharCount(int byteCount) const;
419 * Gets the encoder for the current encoding.
423 * @return A pointer to the Encoder instance for the current encoding
424 * @remarks Contrary to GetBytes(), an encoder can convert partial sequences of characters into
425 * partial sequences of bytes by maintaining the appropriate states between the conversions.
428 virtual Encoder* GetEncoderN(void) const;
431 * Gets the decoder for the current encoding.
435 * @return A pointer to the Decoder instance for the current encoding
436 * @remarks Contrary to GetChars(), a decoder can convert partial sequences of bytes
437 * into partial sequences of characters by maintaining the appropriate states between the conversions.
440 virtual Decoder* GetDecoderN(void) const;
443 * Gets the encoding type of the current instance.
447 * @return An encoding type
449 virtual Tizen::Base::String GetEncodingType(void) const;
453 * The implementation of this copy constructor is intentionally blank and declared as private to
454 * prohibit copying of objects.
456 Utf8Encoding(const Utf8Encoding& utf8Encoding);
459 * The implementation of this copy assignment operator is intentionally blank and declared as private
460 * to prohibit copying of objects.
462 Utf8Encoding& operator =(const Utf8Encoding& utf8Encoding);
464 friend class Utf8Decoder;
465 friend class _Utf8EncodingImpl;
466 class _Utf8EncodingImpl* __pUtf8EncodingImpl;
470 #endif //_FTEXT_UTF8_ENCODING_H_