2 // Open Service Platform
3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 // Licensed under the Apache License, Version 2.0 (the License);
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
19 * @file FTextUtf8Encoding.h
20 * @brief This is the header file for the %Utf8Encoding class.
22 * This header file contains the declarations of the %Utf8Encoding class.
23 * The %Utf8Encoding class is derived from the Encoding class.
25 #ifndef _FTEXT_UTF8_ENCODING_H_
26 #define _FTEXT_UTF8_ENCODING_H_
28 #include <FTextEncoding.h>
29 #include <FTextUtf8Encoder.h>
30 #include <FTextUtf8Decoder.h>
33 namespace Tizen { namespace Text
38 * @brief This class is an implementation of the UTF-8 encoding.
42 * @final This class is not intended for extension.
44 * The %Utf8Encoding class is an implementation of the UTF-8 encoding.
45 * Universal Transformation Format-8 (UTF-8) is a translated language used on the internet. It converts Unicode to 8-bit bytes. UTF-8 encoding supports all the Unicode character values and surrogates.
47 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/text/converting_all_text_data.htm">Converting All Text Data at Once</a>.
49 * The following example demonstrates how to use the %Utf8Encoding class.
56 * using namespace Tizen::Base;
57 * using namespace Tizen::Text;
60 * MyClass::Utf8EncodingSample(void)
64 * String str(L"(\u03a0) and (\u03a3)");
67 * utf8.GetByteCount(str, byteCount);
70 * ByteBuffer* pBuffer = utf8.GetBytesN(str);
73 * utf8.GetCharCount(*pBuffer, charCount);
77 * utf8.GetString(*pBuffer, decodedStr);
79 * if (str.Equals(decodedStr))
91 class _OSP_EXPORT_ Utf8Encoding
96 * This is the default constructor for this class.
103 * This is the destructor for this class. @n
104 * This destructor overrides Tizen::Text::Encoding::~Encoding().
108 virtual ~Utf8Encoding(void);
111 * Gets the total number of bytes that are generated by encoding an instance of specified string. @n
112 * The GetByteCount() method determines the total number of bytes that are generated when the specified string is encoded.
116 * @return An error code
117 * @param[in] str The string to encode
118 * @param[out] byteCount The total number of bytes required to encode the string
119 * @exception E_SUCCESS The method is successful.
120 * @exception E_INVALID_ARG A specified input parameter is invalid, or
121 * the specified @c str is an empty string.
122 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
123 * @see GetMaxByteCount()
125 virtual result GetByteCount(const Tizen::Base::String& str, int& byteCount) const;
128 * Gets the total number of bytes that are generated by encoding an instance of specified Tizen::Base::WcharBuffer. @n
129 * The GetByteCount() method determines the exact number of bytes
130 * produced if the given array of characters is encoded.
134 * @return An error code
135 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
136 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
137 * @exception E_SUCCESS The method is successful.
138 * @exception E_INVALID_ARG A specified input parameter is invalid, or
139 * the specified @c chars is empty.
140 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
141 * @see GetMaxByteCount()
143 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int& byteCount) const;
146 * Gets the total number of bytes that are required to encode a range of characters in the specified Tizen::Base::WcharBuffer instance. @n
147 * The GetByteCount() method determines the total number of bytes that are generated when the specified array of characters is encoded.
151 * @return An error code
152 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
153 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
154 * @param[in] charCount The total number of characters to encode
155 * @param[out] byteCount The total number of bytes required to encode the specified range of characters
156 * @exception E_SUCCESS The method is successful.
157 * @exception E_INVALID_ARG A specified input parameter is invalid, or
158 * the specified @c chars is empty.
159 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
160 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
161 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
162 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
163 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
164 * @see GetMaxByteCount()
166 virtual result GetByteCount(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount, int& byteCount) const;
169 * Encodes an instance of specified Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer.
173 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
174 * else @c null if an exception occurs @n
175 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
176 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
177 * @exception E_SUCCESS The method is successful.
178 * @exception E_OUT_OF_MEMORY The memory is insufficient.
179 * @exception E_INVALID_ARG The specified @c chars is empty or invalid.
180 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
181 * @remarks The specific error code can be accessed using the GetLastResult() method.
184 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::WcharBuffer& chars) const;
187 * Encodes an instance of specified Tizen::Base::String into an instance of Tizen::Base::ByteBuffer.
191 * @return A pointer to the Tizen::Base::ByteBuffer instance where the resultant encoded string is stored, @n
192 * else @c null if an exception occurs @n
193 * The buffer limit is the position of the last encoded byte plus one and the starting position is zero.
194 * @param[in] str A string to encode
195 * @exception E_SUCCESS The method is successful.
196 * @exception E_OUT_OF_MEMORY The memory is insufficient.
197 * @exception E_INVALID_ARG A specified input parameter is invalid, or
198 * the specified @c str is an empty string.
199 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
200 * @remarks The specific error code can be accessed using the GetLastResult() method.
203 virtual Tizen::Base::ByteBuffer* GetBytesN(const Tizen::Base::String& str) const;
206 * Encodes an instance of Tizen::Base::WcharBuffer into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
207 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
211 * @return An error code
212 * @param[in] chars An instance of Tizen::Base::WcharBuffer to encode
213 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
214 * @param[in] charCount The total number of characters to encode
215 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
216 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
217 * @exception E_SUCCESS The method is successful.
218 * @exception E_OUT_OF_MEMORY The memory is insufficient.
219 * @exception E_INVALID_ARG A specified input parameter is invalid, or
220 * the specified @c chars or @c bytes is empty.
221 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
222 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c chars.
223 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
224 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c chars.
225 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
226 * the specified @c bytes does not contain sufficient space to store the encoded characters.
227 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
230 virtual result GetBytes(const Tizen::Base::WcharBuffer& chars, int charIndex, int charCount,
231 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
234 * Encodes an instance of Tizen::Base::String into an instance of Tizen::Base::ByteBuffer as per the specified range. @n
235 * The position and limit of the pointer to the %Tizen::Base::ByteBuffer instance is not changed.
239 * @return An error code
240 * @param[in] str A string to encode
241 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
242 * @param[in] charCount The total number of characters to encode
243 * @param[out] bytes The Tizen::Base::ByteBuffer instance where the resultant encoded string is stored
244 * @param[in] byteIndex The starting index of the resultant encoding in the Tizen::Base::ByteBuffer instance
245 * @exception E_SUCCESS The method is successful.
246 * @exception E_OUT_OF_MEMORY The memory is insufficient.
247 * @exception E_INVALID_ARG A specified input parameter is invalid, or
248 * the specified @c str or @c bytes is empty.
249 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
250 * the length of the specified @c charIndex or @c charCount is greater than the length of the specified @c str.
251 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
252 * the sum of the length of the specified @c charIndex and @c charCount is greater than the length of the specified @c str.
253 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
254 * the specified @c bytes does not contain sufficient space to store the encoded characters.
255 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
258 virtual result GetBytes(const Tizen::Base::String& str, int charIndex, int charCount,
259 Tizen::Base::ByteBuffer& bytes, int byteIndex = 0) const;
262 * Gets the total number of characters that are generated by decoding an instance of Tizen::Base::ByteBuffer. @n
263 * The GetCharCount() method determines the exact number of characters
264 * produced if the given range of bytes is converted.
268 * @return An error code
269 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
270 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
271 * @exception E_SUCCESS The method is successful.
272 * @exception E_INVALID_ARG A specified input parameter is invalid, or
273 * the specified @c bytes is empty.
274 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
275 * @see GetMaxCharCount()
277 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int& charCount) const;
280 * Gets the total number of characters that are generated by decoding a range of elements specified in the Tizen::Base::ByteBuffer instance. @n
281 * The GetCharCount() method determines the exact number of characters
282 * produced if the given range of bytes is converted.
286 * @return An error code
287 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
288 * @param[in] byteIndex The index from where decoding begins
289 * @param[in] byteCount The total number of bytes to decode
290 * @param[out] charCount The total number of characters that are generated by decoding the specified Tizen::Base::ByteBuffer instance
291 * @exception E_SUCCESS The method is successful.
292 * @exception E_INVALID_ARG A specified input parameter is invalid, or
293 * the specified @c bytes is empty.
294 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
295 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
296 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
297 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
298 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
299 * @see GetMaxCharCount()
301 virtual result GetCharCount(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount, int& charCount) const;
304 * Decodes an instance of specified Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer.
308 * @return A pointer to the Tizen::Base::WcharBuffer instance where the resultant decoded data is stored, @n
309 * else @c null if an exception occurs @n
310 * The buffer limit is the position of the last decoded byte plus one and the position is zero.
311 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
312 * @exception E_SUCCESS The method is successful.
313 * @exception E_OUT_OF_MEMORY The memory is insufficient.
314 * @exception E_INVALID_ARG The specified @c bytes is empty or invalid.
315 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
316 * @remarks The specific error code can be accessed using the GetLastResult() method.
319 virtual Tizen::Base::WcharBuffer* GetCharsN(const Tizen::Base::ByteBuffer& bytes) const;
322 * Decodes an instance of Tizen::Base::ByteBuffer into an instance of Tizen::Base::WcharBuffer as per the specified range. @n
323 * The position and limit of the pointer to the %Tizen::Base::WcharBuffer instance is not changed.
327 * @return An error code
328 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
329 * @param[in] byteIndex The index from where decoding begins
330 * @param[in] byteCount The total number of bytes to decode
331 * @param[out] chars The Tizen::Base::WcharBuffer instance where the resultant decoded data is stored
332 * @param[in] charIndex The index from where encoding begins in the Tizen::Base::WcharBuffer instance
333 * @exception E_SUCCESS The method is successful.
334 * @exception E_OUT_OF_MEMORY The memory is insufficient.
335 * @exception E_INVALID_ARG A specified input parameter is invalid, or
336 * the specified @c bytes or @c chars is empty.
337 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
338 * the length of the specified @c byteIndex or @c byteCount is greater than the length of the specified @c bytes.
339 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
340 * the sum of the length of the specified @c byteIndex and @c byteCount is greater than the length of the specified @c bytes.
341 * @exception E_OVERFLOW This operation has caused the memory to overflow, or
342 * the specified @c chars does not contain sufficient space to store the decoded bytes.
343 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
346 virtual result GetChars(const Tizen::Base::ByteBuffer& bytes, int byteIndex, int byteCount,
347 Tizen::Base::WcharBuffer& chars, int charIndex = 0) const;
350 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
354 * @return An error code
355 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
356 * @param[out] str A Tizen::Base::String instance @n
357 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
358 * @exception E_SUCCESS The method is successful.
359 * @exception E_OUT_OF_MEMORY The memory is insufficient.
360 * @exception E_INVALID_ARG A specified input parameter is invalid, or
361 * the specified @c bytes is empty.
362 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
365 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, Tizen::Base::String& str) const;
368 * Gets a string containing the decoded representation of the specified Tizen::Base::ByteBuffer instance.
372 * @return An error code
373 * @param[in] bytes An instance of Tizen::Base::ByteBuffer to decode
374 * @param[in] index The index from where decoding begins
375 * @param[in] count The total number of bytes to decode
376 * @param[out] str A Tizen::Base::String instance @n
377 * It contains the decoded representation of the specified Tizen::Base::ByteBuffer instance.
378 * @exception E_SUCCESS The method is successful.
379 * @exception E_OUT_OF_MEMORY The memory is insufficient.
380 * @exception E_INVALID_ARG A specified input parameter is invalid, or
381 * the specified @c bytes is empty.
382 * @exception E_OUT_OF_RANGE The value of an argument is outside the valid range defined by the method, or
383 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
384 * @exception E_UNDERFLOW This operation has caused the memory to underflow, or
385 * the sum of the length of the specified @c index and @c count is greater than the length of the specified @c bytes.
386 * @exception E_INVALID_ENCODING_RANGE The specified string contains code points that are outside the bounds of the character encoding scheme.
389 virtual result GetString(const Tizen::Base::ByteBuffer& bytes, int index, int count, Tizen::Base::String& str) const;
393 * Gets the maximum number of bytes required for encoding a given number of characters.
397 * @return The maximum number of bytes required for encoding a given number of characters
398 * @param[in] charCount The total number of characters to encode
399 * @remarks GetMaxByteCount() determines an appropriate buffer size for the byte arrays passed to GetBytes() for encoding.
400 * @see GetByteCount()
403 virtual int GetMaxByteCount(int charCount) const;
407 * Gets the maximum number of characters that are generated by decoding the specified number of bytes.
411 * @return The maximum number of characters generated by decoding the specified number of bytes
412 * @param[in] byteCount The total number of bytes to encode
413 * @remarks GetMaxCharCount() determines an appropriate buffer size for the character arrays passed to
414 * GetChars() or a decoder for encoding.
415 * @see GetCharCount()
418 virtual int GetMaxCharCount(int byteCount) const;
421 * Gets the encoder for the current encoding.
425 * @return A pointer to the Encoder instance for the current encoding
426 * @remarks Contrary to GetBytes(), an encoder can convert partial sequences of characters into
427 * partial sequences of bytes by maintaining the appropriate states between the conversions.
430 virtual Encoder* GetEncoderN(void) const;
433 * Gets the decoder for the current encoding.
437 * @return A pointer to the Decoder instance for the current encoding
438 * @remarks Contrary to GetChars(), a decoder can convert partial sequences of bytes
439 * into partial sequences of characters by maintaining the appropriate states between the conversions.
442 virtual Decoder* GetDecoderN(void) const;
445 * Gets the encoding type of the current instance.
449 * @return An encoding type
451 virtual Tizen::Base::String GetEncodingType(void) const;
455 * The implementation of this copy constructor is intentionally blank and declared as private to
456 * prohibit copying of objects.
458 Utf8Encoding(const Utf8Encoding& utf8Encoding);
461 * The implementation of this copy assignment operator is intentionally blank and declared as private
462 * to prohibit copying of objects.
464 Utf8Encoding& operator =(const Utf8Encoding& utf8Encoding);
466 friend class Utf8Decoder;
467 friend class _Utf8EncodingImpl;
468 class _Utf8EncodingImpl* __pUtf8EncodingImpl;
472 #endif //_FTEXT_UTF8_ENCODING_H_