2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <dali-toolkit/internal/text/character-set-conversion.h>
22 #include <dali-toolkit-test-suite-utils.h>
23 #include <dali-toolkit/dali-toolkit.h>
27 using namespace Toolkit;
30 // Tests the following functions for scripts with different number of bytes per character.
31 // Latin 1 byte per character, Arabic 2 bytes per character, Devanagari 3 bytes per character and emojis 4 bytes per character.
33 // uint8_t GetUtf8Length( uint8_t utf8LeadByte );
34 // uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length );
35 // uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters );
36 // uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 );
37 // uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 );
38 // void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, std::string& utf8 );
41 //////////////////////////////////////////////////////////
46 struct GetNumberOfUtf8CharactersData
48 std::string description; ///< Description of the test.
49 std::string text; ///< input text.
50 unsigned int numberOfCharacters; ///< The expected number of characters.
53 bool GetNumberOfUtf8CharactersTest( const GetNumberOfUtf8CharactersData& data )
55 return GetNumberOfUtf8Characters( reinterpret_cast<const uint8_t*>( data.text.c_str() ), data.text.size() ) == data.numberOfCharacters;
58 //////////////////////////////////////////////////////////
60 struct GetNumberOfUtf8BytesData
62 std::string description; ///< Description of the test.
63 unsigned int* utf32; ///< input text in utf32.
64 unsigned int numberOfCharacters; ///< The number of characters.
65 unsigned int numberOfBytes; ///< The expected number of bytes in utf8.
68 bool GetNumberOfUtf8BytesTest( const GetNumberOfUtf8BytesData& data )
70 return GetNumberOfUtf8Bytes( data.utf32, data.numberOfCharacters ) == data.numberOfBytes;
73 //////////////////////////////////////////////////////////
75 struct Utf8ToUtf32Data
77 std::string description; ///< Description of the test.
78 std::string text; ///< input text.
79 unsigned int* utf32; ///< The expected text (array of bytes with text encoded in utf32).
83 bool Utf8ToUtf32Test( const Utf8ToUtf32Data& data )
85 Vector<uint32_t> utf32;
86 utf32.Resize( data.text.size() );
88 const uint32_t numberOfCharacters = Utf8ToUtf32( reinterpret_cast<const uint8_t* const>( data.text.c_str() ),
92 for( unsigned int index = 0u; index < numberOfCharacters; ++index )
94 if( data.utf32[index] != utf32[index] )
105 //////////////////////////////////////////////////////////
107 struct Utf32ToUtf8Data
109 std::string description; ///< Description of the test.
110 unsigned int* utf32; ///< The input text (array of bytes with text encoded in utf32).
111 unsigned int numberOfCharacters; ///< The number of characters.
112 std::string text; ///< The expected text.
115 bool Utf32ToUtf8Test( const Utf32ToUtf8Data& data )
119 Utf32ToUtf8( data.utf32, data.numberOfCharacters, text );
121 return text == data.text;
124 //////////////////////////////////////////////////////////
126 int UtcDaliTextCharacterSetConversionGetUtf8Length(void)
128 ToolkitTestApplication application;
129 tet_infoline(" UtcDaliTextCharacterSetConversionGetUtf8Length");
131 // Copy of the table used to get the size in bytes of a character encoded with utf8.
132 // If the table used by the GetUtf8Length() function is updated, this one needs to be updated as well.
133 const static uint8_t U1 = 1u;
134 const static uint8_t U2 = 2u;
135 const static uint8_t U3 = 3u;
136 const static uint8_t U4 = 4u;
137 const static uint8_t U0 = 0u;
138 const static uint8_t UTF8_LENGTH[256] = {
139 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
140 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
141 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
142 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
143 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
144 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
145 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
146 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
147 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // lead byte = 0xxx xxxx (U+0000 - U+007F + some extended ascii characters)
148 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
149 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
150 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
151 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
152 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
153 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
154 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
155 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
156 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
157 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
160 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
161 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // lead byte = 110x xxxx (U+0080 - U+07FF)
162 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
165 U3, U3, U3, U3, U3, U3, U3, U3, U3, U3, // lead byte = 1110 xxxx (U+0800 - U+FFFF)
166 U3, U3, U3, U3, U3, U3, //
168 U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF)
170 U0, U0, U0, U0, // Non valid.
171 U0, U0, U0, U0, // Non valid.
174 for( unsigned int index = 0; index < 256u; ++index )
176 if( GetUtf8Length( index ) != UTF8_LENGTH[static_cast<uint8_t>(index)] )
178 tet_result(TET_FAIL);
182 tet_result(TET_PASS);
187 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters(void)
189 ToolkitTestApplication application;
190 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters");
192 const GetNumberOfUtf8CharactersData data[] =
211 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
215 const unsigned int numberOfTests = 4u;
217 for( unsigned int index = 0u; index < numberOfTests; ++index )
219 if( !GetNumberOfUtf8CharactersTest( data[index] ) )
221 tet_result(TET_FAIL);
225 tet_result(TET_PASS);
229 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes(void)
231 ToolkitTestApplication application;
232 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
234 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
235 unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
236 unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
237 unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
239 const GetNumberOfUtf8BytesData data[] =
266 const unsigned int numberOfTests = 4u;
268 for( unsigned int index = 0u; index < numberOfTests; ++index )
270 if( !GetNumberOfUtf8BytesTest( data[index] ) )
272 tet_result(TET_FAIL);
276 tet_result(TET_PASS);
280 int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void)
282 ToolkitTestApplication application;
283 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
285 char utf8_06[] = { -8, -7, -6, -5, -4, -3, -2, -1 }; // Invalid string
287 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
288 unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF
289 unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
290 unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
291 unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
292 unsigned int utf32_06[] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; // Invalid string
294 const Utf8ToUtf32Data data[] =
302 "Latin script with 'CR' and 'CR'+'LF'",
303 "\xd Hello\xd\xa World",
318 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
327 const unsigned int numberOfTests = 6u;
329 for( unsigned int index = 0u; index < numberOfTests; ++index )
331 if( !Utf8ToUtf32Test( data[index] ) )
333 tet_result(TET_FAIL);
337 tet_result(TET_PASS);
341 int UtcDaliTextCharacterSetConversionUtf32ToUtf8(void)
343 ToolkitTestApplication application;
344 tet_infoline(" UtcDaliTextCharacterSetConversionUtf32ToUtf8");
346 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
347 unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
348 unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
349 unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
351 struct Utf32ToUtf8Data data[] =
375 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
379 const unsigned int numberOfTests = 4u;
381 for( unsigned int index = 0u; index < numberOfTests; ++index )
383 if( !Utf32ToUtf8Test( data[index] ) )
385 tet_result(TET_FAIL);
389 tet_result(TET_PASS);