2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <dali-toolkit/internal/text/character-set-conversion.h>
22 #include <dali-toolkit-test-suite-utils.h>
23 #include <dali-toolkit/dali-toolkit.h>
27 using namespace Toolkit;
30 // Tests the following functions for scripts with different number of bytes per character.
31 // Latin 1 byte per character, Arabic 2 bytes per character, Devanagari 3 bytes per character and emojis 4 bytes per character.
33 // uint8_t GetUtf8Length( uint8_t utf8LeadByte );
34 // uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length );
35 // uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters );
36 // uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 );
37 // uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 );
38 // void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, std::string& utf8 );
41 //////////////////////////////////////////////////////////
46 struct GetNumberOfUtf8CharactersData
48 std::string description; ///< Description of the test.
49 std::string text; ///< input text.
50 unsigned int numberOfCharacters; ///< The expected number of characters.
53 bool GetNumberOfUtf8CharactersTest( const GetNumberOfUtf8CharactersData& data )
55 return GetNumberOfUtf8Characters( reinterpret_cast<const uint8_t*>( data.text.c_str() ), data.text.size() ) == data.numberOfCharacters;
58 //////////////////////////////////////////////////////////
60 struct GetNumberOfUtf8BytesData
62 std::string description; ///< Description of the test.
63 unsigned int* utf32; ///< input text in utf32.
64 unsigned int numberOfCharacters; ///< The number of characters.
65 unsigned int numberOfBytes; ///< The expected number of bytes in utf8.
68 bool GetNumberOfUtf8BytesTest( const GetNumberOfUtf8BytesData& data )
70 return GetNumberOfUtf8Bytes( data.utf32, data.numberOfCharacters ) == data.numberOfBytes;
73 //////////////////////////////////////////////////////////
75 struct Utf8ToUtf32Data
77 std::string description; ///< Description of the test.
78 std::string text; ///< input text.
79 unsigned int* utf32; ///< The expected text (array of bytes with text encoded in utf32).
83 bool Utf8ToUtf32Test( const Utf8ToUtf32Data& data )
85 Vector<uint32_t> utf32;
86 utf32.Resize( data.text.size() );
88 const uint32_t numberOfCharacters = Utf8ToUtf32( reinterpret_cast<const uint8_t* const>( data.text.c_str() ),
92 for( unsigned int index = 0u; index < numberOfCharacters; ++index )
94 if( data.utf32[index] != utf32[index] )
105 //////////////////////////////////////////////////////////
107 struct Utf32ToUtf8Data
109 std::string description; ///< Description of the test.
110 unsigned int* utf32; ///< The input text (array of bytes with text encoded in utf32).
111 unsigned int numberOfCharacters; ///< The number of characters.
112 std::string text; ///< The expected text.
115 bool Utf32ToUtf8Test( const Utf32ToUtf8Data& data )
119 Utf32ToUtf8( data.utf32, data.numberOfCharacters, text );
121 return text == data.text;
124 //////////////////////////////////////////////////////////
126 int UtcDaliTextCharacterSetConversionGetUtf8Length(void)
128 ToolkitTestApplication application;
129 tet_infoline(" UtcDaliTextCharacterSetConversionGetUtf8Length");
131 // Copy of the table used to get the size in bytes of a character encoded with utf8.
132 // If the table used by the GetUtf8Length() function is updated, this one needs to be updated as well.
133 const static uint8_t U1 = 1u;
134 const static uint8_t U2 = 2u;
135 const static uint8_t U3 = 3u;
136 const static uint8_t U4 = 4u;
137 const static uint8_t U5 = 5u;
138 const static uint8_t U6 = 6u;
139 const static uint8_t U0 = 0u;
140 const static uint8_t UTF8_LENGTH[256] = {
141 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
142 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
143 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
144 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
145 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
146 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
147 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
148 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
149 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // lead byte = 0xxx xxxx (U+0000 - U+007F + some extended ascii characters)
150 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
151 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
152 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
153 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
154 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
155 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
156 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
157 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
158 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
159 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
162 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
163 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // lead byte = 110x xxxx (U+0080 - U+07FF)
164 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
167 U3, U3, U3, U3, U3, U3, U3, U3, U3, U3, // lead byte = 1110 xxxx (U+0800 - U+FFFF)
168 U3, U3, U3, U3, U3, U3, //
170 U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF)
172 U5, U5, U5, U5, // lead byte = 1111 10xx (U+200000 - U+3FFFFFF)
174 U6, U6, // lead byte = 1111 110x (U+4000000 - U+7FFFFFFF)
176 U0, U0, // Non valid.
179 for( unsigned int index = 0; index < 256u; ++index )
181 if( GetUtf8Length( index ) != UTF8_LENGTH[static_cast<uint8_t>(index)] )
183 tet_result(TET_FAIL);
187 tet_result(TET_PASS);
192 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters(void)
194 ToolkitTestApplication application;
195 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters");
197 const GetNumberOfUtf8CharactersData data[] =
216 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
221 "\xF8\xA0\x80\x80\x80",
226 "\xFC\x84\x80\x80\x80\x80",
230 const unsigned int numberOfTests = 6u;
232 for( unsigned int index = 0u; index < numberOfTests; ++index )
234 if( !GetNumberOfUtf8CharactersTest( data[index] ) )
236 tet_result(TET_FAIL);
240 tet_result(TET_PASS);
244 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes(void)
246 ToolkitTestApplication application;
247 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
249 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
250 unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
251 unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
252 unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
253 unsigned int utf32_05[] = { 0x800000 };
254 unsigned int utf32_06[] = { 0x4000000 };
256 const GetNumberOfUtf8BytesData data[] =
295 const unsigned int numberOfTests = 6u;
297 for( unsigned int index = 0u; index < numberOfTests; ++index )
299 if( !GetNumberOfUtf8BytesTest( data[index] ) )
301 tet_result(TET_FAIL);
305 tet_result(TET_PASS);
309 int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void)
311 ToolkitTestApplication application;
312 tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
314 char utf8_06[] = { -2, -1 }; // Invalid string
316 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
317 unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF
318 unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
319 unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
320 unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
321 unsigned int utf32_06[] = { 0x800000 };
322 unsigned int utf32_07[] = { 0x4000000 };
323 unsigned int utf32_08[] = { 0x20, 0x20 }; // Invalid string
325 const Utf8ToUtf32Data data[] =
333 "Latin script with 'CR' and 'CR'+'LF'",
334 "\xd Hello\xd\xa World",
349 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
354 "\xF8\xA0\x80\x80\x80",
359 "\xFC\x84\x80\x80\x80\x80",
368 const unsigned int numberOfTests = 8u;
370 for( unsigned int index = 0u; index < numberOfTests; ++index )
372 if( !Utf8ToUtf32Test( data[index] ) )
374 tet_result(TET_FAIL);
378 tet_result(TET_PASS);
382 int UtcDaliTextCharacterSetConversionUtf32ToUtf8(void)
384 ToolkitTestApplication application;
385 tet_infoline(" UtcDaliTextCharacterSetConversionUtf32ToUtf8");
387 unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
388 unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
389 unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
390 unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
391 unsigned int utf32_05[] = { 0x800000 };
392 unsigned int utf32_06[] = { 0x4000000 };
394 struct Utf32ToUtf8Data data[] =
418 "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
424 "\xF8\xA0\x80\x80\x80",
430 "\xFC\x84\x80\x80\x80\x80",
434 const unsigned int numberOfTests = 6u;
436 for( unsigned int index = 0u; index < numberOfTests; ++index )
438 if( !Utf32ToUtf8Test( data[index] ) )
440 tet_result(TET_FAIL);
444 tet_result(TET_PASS);